diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..125f3aaa --- /dev/null +++ b/.env.example @@ -0,0 +1,19 @@ +BOT_TOKEN=telegram-bot-token +WEBHOOK_URL=https://example.com/telegram/webhook +ALLOWLIST_IDS=12345,67890 + +# Optional settings +BOT_TOKEN_FILE=/path/to/token.txt +BOT_DB_PATH=bot_state.sqlite +STRIX_ROOT=. +BOT_HTTP_HOST=0.0.0.0 +BOT_HTTP_PORT=8081 +BOT_HTTP_TOKEN=changeme +BOT_ALERT_WEBHOOK=https://example.com/alert-endpoint +BOT_RATE_LIMIT=1.0 +BOT_GLOBAL_RATE_LIMIT=0.5 +BOT_DEFAULT_VERBOSITY=high-only +# LLM config for Strix core +STRIX_LLM=gpt-4o +LLM_API_KEY=your-llm-key +LLM_API_BASE=https://api.openai.com/v1 # optional for proxies/custom base diff --git a/README.md b/README.md index 53e5a980..6e83caaa 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,7 @@ pipx install strix-agent # Configure your AI provider export STRIX_LLM="openai/gpt-5" export LLM_API_KEY="your-api-key" +# Alternatively place STRIX_LLM/LLM_API_KEY in a .env file alongside the repo. # Run your first security assessment strix --target ./app-directory @@ -234,10 +235,6 @@ Have questions? Found a bug? Want to contribute? **[Join our Discord!](https://d ## 🌟 Support the Project **Love Strix?** Give us a ⭐ on GitHub! -## πŸ™ Acknowledgements - -Strix builds on the incredible work of open-source projects like [LiteLLM](https://github.com/BerriAI/litellm), [Caido](https://github.com/caido/caido), [ProjectDiscovery](https://github.com/projectdiscovery), [Playwright](https://github.com/microsoft/playwright), and [Textual](https://github.com/Textualize/textual). Huge thanks to their maintainers! - > [!WARNING] > Only test apps you own or have permission to test. You are responsible for using Strix ethically and legally. diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..62500409 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,36 @@ +# Strix Documentation Hub + +Audience: AI and human developers extending Strix. Start here to find deep references, workflows, and extension guides. + +- **Quick start**: see `docs/setup-and-running.md`. +- **Architecture**: high-level map in `docs/architecture.md`. +- **Agent loop**: internals in `docs/agent-loop.md`. +- **Tools**: contract and extensions in `docs/tools-and-extensions.md`. +- **Runtime**: sandbox and docker flow in `docs/runtime-and-sandbox.md`. +- **LLM config**: provider setup and tuning in `docs/llm-config.md`. +- **Prompts**: taxonomy and conventions in `docs/prompts.md`. +- **Interface**: CLI/TUI behaviors in `docs/interface.md`. +- **Telemetry**: tracing and events in `docs/telemetry-and-observability.md`. +- **Testing/QA**: strategies in `docs/testing-and-qa.md`. +- **Security/Privacy**: guardrails in `docs/security-and-privacy.md`. +- **Release**: versioning and publishing in `docs/release-and-versioning.md`. +- **Troubleshooting**: fixes in `docs/troubleshooting.md`. +- **Glossary**: definitions in `docs/glossary.md`. +- **Roadmap templates**: RFC/ADR formats in `docs/roadmap-templates.md`. + +Minimum environment +- Python 3.12, Docker running, Playwright browsers installed. +- STRIX_LLM + LLM_API_KEY (or litellm proxy) exported. +- Local write access for `strix_runs/` outputs. + +Flow for new contributors +1) Read `architecture.md` and `agent-loop.md` for mental model. +2) Run a local scan following `setup-and-running.md`. +3) Review `development.md` + `testing-and-qa.md` before changes. +4) Extend tools/prompts/runtime using relevant docs. +5) Update docs and add tests with every feature or bugfix. + +Maintenance +- Keep links valid when files move. +- Update dependency minimums when `pyproject.toml` changes. +- Refresh examples and flags when CLI/TUI arguments change. diff --git a/docs/agent-loop.md b/docs/agent-loop.md new file mode 100644 index 00000000..1fb82114 --- /dev/null +++ b/docs/agent-loop.md @@ -0,0 +1,46 @@ +# Agent Loop Internals + +Primary files: `agents/base_agent.py`, `agents/state.py`, `agents/StrixAgent/strix_agent.py`, `agents/StrixAgent/system_prompt.jinja`. + +## Lifecycle +1) Agent instantiated with config: llm config, max iterations (default 300; adaptive budget via `agents/iteration_policy.py` based on targets + LLM timeout recorded in tracer), non-interactive flag, optional state/local sources. +2) `AgentMeta` wires Jinja environment per agent folder for prompts. +3) `AgentState` tracks messages, tasks, wait states, agent graph ids. +4) Main loop (in `BaseAgent.run`): fetches/creates state, processes queued messages, updates tracer, calls LLM with prompt, dispatches tool invocations, handles waits/finishes. +5) Completion when finish tool invoked, max iterations hit, or fatal error. +6) State persistence: snapshots saved as JSON (default in `strix_runs//_state.json`); can resume via config `load_state_from`. + +## Message handling +- Messages stored in `AgentState`; inter-agent messages include metadata and are added as user messages with delivery notice. +- Tracer updates agent status on message receipt/resume. + +## Tool selection and execution +- LLM output parsed for tool calls β†’ `tools.process_tool_invocations` β†’ `tools/executor.py`. +- Tool executions logged via tracer, results added back into state/context for next iteration. + +## Memory and limits +- `llm/memory_compressor.py` trims context to fit provider limits. +- `llm/request_queue.py` manages concurrency/ordering; `llm/llm.py` handles retries/backoff (tenacity). +- Configurable `max_iterations` per agent instance via config. + +## Error handling +- LLM errors wrapped in `LLMRequestFailedError`; retries applied. +- Tool errors logged and surfaced in state; agents can adapt prompts accordingly. + +## Vulnerability propagation +- Tracer `vulnerability_found_callback` (set in CLI) renders findings immediately; tracer records IDs, severity, content. + +## Extending the agent +- Adjust prompts in `agents/StrixAgent/system_prompt.jinja`. +- Modify decision logic in `StrixAgent/strix_agent.py`. +- Add new state fields carefully; ensure serialization if persisted; update tracer calls to include new metadata. + +## ASCII loop snapshot +``` +State -> Prompt render -> LLM -> Tool calls -> Results -> State update + ^ | + +--------------------Tracer/events------------------+ +``` + +## Maintenance +- Revise when state fields or loop control change; keep diagram aligned with actual steps; update when new hooks are added. diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 00000000..6e0a5ffe --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,50 @@ +# Architecture + +Strix runs coordinated agents that drive tools inside a dockerized sandbox, orchestrated via CLI/TUI, with telemetry and prompt packs guiding behavior. + +## System map +- Entry: CLI/TUI (`interface/main.py`, `cli.py`, `tui.py`) parses args, sets scan config, starts tracer callbacks. +- Agent loop: `agents/base_agent.py`, `agents/state.py`, `agents/StrixAgent/strix_agent.py` manage iterations, messages, tool calls, memory compression. +- Tools: `tools/*` define XML action schemas + Python implementations; `tools/registry.py` registers; `tools/executor.py` dispatches; `interface/tool_components/*` render outputs. +- Runtime: `runtime/docker_runtime.py`, `runtime/tool_server.py`, `runtime/runtime.py` manage sandbox containers and tool execution endpoints. +- LLM: `llm/llm.py`, `llm/config.py`, `llm/request_queue.py`, `llm/memory_compressor.py` handle provider routing, retries, queueing, token budgeting. +- Prompts: `prompts/**/*.jinja`, `agents/StrixAgent/system_prompt.jinja`, `prompts/coordination/root_agent.jinja` supply structured instructions. +- Telemetry: `telemetry/tracer.py` captures agent lifecycle, tool executions, vulnerabilities; `interface/utils.py` renders stats. +- Outputs: run artifacts under `strix_runs/` (reports/logs). + +## Data flow (simplified) +1) User invokes `strix` -> `interface/main.py` builds args, `cli.py`/`tui.py` start UI. +2) Scan config + tracer created -> `StrixAgent` instantiated with `LLMConfig`. +3) Agent loop requests LLM completions; responses trigger tool invocations via `tools/executor.py`. +4) Tools call `runtime/tool_server.py` (docker sandbox) for side effects (browser, proxy, terminal, python, file edits, etc.). +5) Tool results and tracer events propagate to UI renderers; vulnerabilities emitted to console and saved. +6) Loop continues until max iterations, finish action, or user stop; results stored in `strix_runs/`. + +## ASCII data flow +``` +CLI/TUI -> Tracer -> StrixAgent -> LLM -> Tool Executor -> Runtime (Docker) -> Tool Server + ^ | + |-------------------------------------------+ +``` + +## Extension seams +- Add tools: new folder under `tools/`, schema XML, implementation, registry entry, renderer in `interface/tool_components/`. +- Add prompts: new Jinja in `prompts/*` or agent prompt folder; wire selection logic where consumed. +- Add telemetry: emit via `telemetry/tracer.py` helper methods; extend UI renderers to display. +- Add providers: extend `llm/config.py` + `llm/llm.py` to create client, auth, and request path. +- Adjust runtime: modify `runtime/docker_runtime.py` for images/limits, `tool_server.py` for endpoints. + +## Persistence +- Runs: `strix_runs/` contains reports and logs (non-interactive mode prints to stdout too). +- Agent graph: managed in-memory via `tools/agents_graph/agents_graph_actions.py`, rendered by interface. Graph definitions can be loaded/validated from YAML/JSON via `agents/graph_builder.py` (unique ids, single root, parent/child checks) before instantiation. + +## Non-interactive mode +- Enabled via `-n/--non-interactive`; suppresses interactive UI, streams findings to stdout; still uses tracer callbacks for vulnerability events. + +## Reliability and limits +- Max iterations default 300 (`BaseAgent.max_iterations`), configurable via agent config. +- Request queue/backoff in `llm/request_queue.py`; retries in `llm/llm.py` using tenacity. +- Memory compression in `llm/memory_compressor.py` to stay within context limits. + +## Maintenance +- Update module paths if files move; refresh diagram when flows change; align with CLI flag changes. diff --git a/docs/development.md b/docs/development.md new file mode 100644 index 00000000..abe13a4a --- /dev/null +++ b/docs/development.md @@ -0,0 +1,46 @@ +# Development Guide + +## Layout primer +- Core agent logic: `strix/agents/*` +- Tools and action schemas: `strix/tools/*` +- Runtime sandbox: `strix/runtime/*` +- LLM layer: `strix/llm/*` +- Prompts: `strix/prompts/*` and `strix/agents/StrixAgent/system_prompt.jinja` +- Interface (CLI/TUI + renderers): `strix/interface/*` +- Telemetry: `strix/telemetry/*` + +## Standards +- Python 3.12, strict typing (see `pyproject.toml` mypy config). +- Lint/format: `ruff`, `black`, `isort`. +- Security/static: `bandit`, `pylint`. +- Keep docstrings concise; prefer clear variable names over comments. + +## Commands +```bash +# Format + lint +poetry run ruff check . +poetry run black . +poetry run isort . + +# Type check +poetry run mypy . +poetry run pyright + +# Tests +poetry run pytest +poetry run pytest --cov +``` + +## Workflow +- Create feature branches; keep commits scoped. +- Run format + lint + tests before PR. +- Update relevant docs when changing behavior, flags, prompts, or outputs. +- Add regression coverage for new tools/prompt changes/runtime adjustments. + +## Performance tips +- Reuse docker images; avoid repeated pulls. +- Cache provider auth where possible; tune LLM parallelism in config. +- Use smaller prompt packs for targeted testing when iterating quickly. + +## Maintenance +- Revise commands/tools when linters/types/test stacks change; align with `pyproject.toml`. diff --git a/docs/glossary.md b/docs/glossary.md new file mode 100644 index 00000000..49e0cd26 --- /dev/null +++ b/docs/glossary.md @@ -0,0 +1,15 @@ +# Glossary + +- Agent loop: Iterative cycle in `agents/base_agent.py` driving LLM calls and tool executions. +- Agent graph: In-memory map of agents managed via `tools/agents_graph/agents_graph_actions.py`, rendered in UI. +- Action schema: XML definition (`*_actions_schema.xml`) describing tool actions/args. +- Renderer: UI component in `interface/tool_components/*` mapping tool outputs to panels. +- Runtime: Docker-based sandbox managed by `runtime/docker_runtime.py` and `tool_server.py`. +- Tracer: Telemetry recorder in `telemetry/tracer.py` logging agent and tool events. +- Run name: Unique id for a scan; names output directory `strix_runs/`. +- Request queue: LLM request coordinator in `llm/request_queue.py`. +- Memory compressor: Context trimming utility in `llm/memory_compressor.py`. +- Non-interactive mode: Headless CLI mode (`-n`) emitting findings to stdout without TUI. + +## Maintenance +- Add new terms as components are introduced; keep paths accurate after refactors. diff --git a/docs/interface.md b/docs/interface.md new file mode 100644 index 00000000..7f5db109 --- /dev/null +++ b/docs/interface.md @@ -0,0 +1,30 @@ +# Interface (CLI/TUI) + +## Entrypoints +- `interface/main.py`: CLI entry, argument parser wiring, dispatch to CLI/TUI. +- `interface/cli.py`: non-interactive flow; renders startup panel and vulnerability panels via tracer callbacks. +- `interface/tui.py`: textual-based interactive UI; panels for tools, logs, stats. +- `interface/utils.py`: stats builders, severity colors, shared helpers. + +## Arguments (key) +- `--target/-t`: target path/URL (multi allowed). +- `--instruction`: freeform guidance to agent. +- `--non-interactive/-n`: headless mode; prints findings to stdout. +- `--run-name`: optional custom run id (otherwise generated). +- Provider/env vars read separately; ensure `STRIX_LLM`/`LLM_API_KEY` set. + +## Rendering +- Tool outputs mapped via `interface/tool_components/*` renderers (browser, proxy, terminal, file edits, reports, notes, thinking, etc.). +- Live stats and final stats built in `utils.py` and displayed in panels. +- Vulnerabilities emitted from tracer callback in CLI mode; TUI shows panes with updates. + +## Customization +- Styles in `interface/assets/tui_styles.tcss`. +- Add new renderers by extending `tool_components/base_renderer.py` and registering in `registry.py`. + +## Non-interactive behavior +- Skips TUI; logs findings immediately. +- Still writes outputs under `strix_runs/`. + +## Maintenance +- Update argument list when CLI flags change; refresh renderer mapping when new tools are added. diff --git a/docs/llm-config.md b/docs/llm-config.md new file mode 100644 index 00000000..db809ee3 --- /dev/null +++ b/docs/llm-config.md @@ -0,0 +1,33 @@ +# LLM Configuration + +Core files: `llm/config.py`, `llm/llm.py`, `llm/request_queue.py`, `llm/memory_compressor.py`, `llm/utils.py`. + +## Providers and models +- `LLMConfig` defines provider/model id (e.g., `openai/gpt-5`) and auth. +- Extend providers by adding client setup and request paths in `llm/llm.py`; expose config knobs in `config.py`. + +## Request handling +- Requests queued via `request_queue.py` to manage concurrency and order. +- Retries/backoff handled in `llm.py` using tenacity; errors wrapped in `LLMRequestFailedError`. +- Streaming support depends on provider implementation in `llm.py`. + +## Context management +- `memory_compressor.py` trims conversation/state to fit provider token limits. +- `llm/utils.py` cleans content before sending to providers. + +## Tuning +- Control parallelism and rate limits in request queue. +- Adjust model choice to balance cost vs. latency. +- Customize temperature/other params in `LLMConfig`. + +## Telemetry +- LLM calls can be logged via tracer; ensure sensitive data is redacted before emission. + +## Adding a new provider +1) Define config fields in `config.py`. +2) Add client creation and request method in `llm.py`. +3) Wire retries/backoff and error normalization. +4) Update docs and examples in `setup-and-running.md`. + +## Maintenance +- Revise when providers/models or retry/queue logic change; ensure env var expectations are documented. diff --git a/docs/prompts.md b/docs/prompts.md new file mode 100644 index 00000000..daa098fa --- /dev/null +++ b/docs/prompts.md @@ -0,0 +1,33 @@ +# Prompts + +## Taxonomy +- Coordination: `prompts/coordination/root_agent.jinja` +- Frameworks: `prompts/frameworks/*.jinja` (e.g., `fastapi`, `nextjs`) +- Technologies: `prompts/technologies/*.jinja` (e.g., `firebase_firestore`, `supabase`) +- Vulnerabilities: `prompts/vulnerabilities/*.jinja` (e.g., `sql_injection`, `xss`, `rce`) +- Auth playbooks: `prompts/auth/oidc_saml_sso.jinja` +- Cloud/custom/recon placeholders: `prompts/cloud`, `prompts/custom`, `prompts/reconnaissance` +- Agent system prompt: `agents/StrixAgent/system_prompt.jinja` + +## Conventions +- Jinja templates with explicit placeholders; avoid hidden assumptions. +- Keep titles and sections consistent for downstream parsing. +- Prefer actionable guidance (steps, checks, PoC ideas) and explicit do/don’t lists. + +## Selection/combination +- Agent selects relevant prompt packs based on target metadata; templates rendered via Jinja environment set in `AgentMeta`. +- Root coordination prompt guides multi-agent behavior; specialized prompts augment depending on framework/tech/vuln focus. + +## Safe testing +- Dry-run new prompts in non-interactive mode against test targets. +- Check for prompt injection surfaces; ensure instructions avoid unsafe actions outside sandbox. +- Validate output format expected by tools (e.g., when tool calls must be produced). + +## Adding a prompt pack +1) Create `.jinja` file in appropriate folder with descriptive name. +2) Document variables required; keep defaults sensible. +3) Add regression test or fixture to ensure rendering works and key strings exist. +4) Update this doc and any selection logic if needed. + +## Maintenance +- Refresh taxonomy when adding/removing prompt packs; ensure variable names stay consistent with agent code. diff --git a/docs/release-and-versioning.md b/docs/release-and-versioning.md new file mode 100644 index 00000000..6667b205 --- /dev/null +++ b/docs/release-and-versioning.md @@ -0,0 +1,26 @@ +# Release and Versioning + +Current version: 0.4.0 (`pyproject.toml`). + +## Versioning +- Follow semantic-ish bumps: increment patch for fixes, minor for features, major for breaking changes. +- Update `pyproject.toml` version and any surfaced docs. + +## Packaging +```bash +poetry build +poetry publish # requires credentials +``` +- Ensure `README.md` and license included (listed in `[tool.poetry]` include). +- Verify wheels/sdist contain `.jinja`, `.xml`, `.tcss` assets (declared in `include`). + +## Changelog +- Maintain a changelog (add file if missing) summarizing features, fixes, breaking changes. +- Reference PRs/issues; highlight security-impacting changes. + +## Compatibility +- Python 3.12 only (per `pyproject.toml`). +- Document any deprecated flags or behaviors and provide migration notes. + +## Maintenance +- Update version numbers and commands when packaging flow changes; ensure asset include lists stay correct. diff --git a/docs/roadmap-templates.md b/docs/roadmap-templates.md new file mode 100644 index 00000000..e475b43d --- /dev/null +++ b/docs/roadmap-templates.md @@ -0,0 +1,37 @@ +# Roadmap and Templates + +## RFC/ADR template +``` +Title: +Status: Draft/Approved/Rejected +Owner: +Date: +Summary: +Context: +Options considered: +Decision: +Impact (security/latency/cost/UX): +Migration plan: +Testing plan: +Open questions: +``` + +## Backlog item template +``` +ID: +Title: +Area: agent | tool | runtime | prompt | UI | telemetry | infra +Problem/Goal: +Proposed approach: +Risks: +Acceptance criteria: +Tests required: +Docs to update: +``` + +## Prioritization hints +- Prefer items that improve security signal quality, reduce latency/cost, or harden sandboxing. +- Require tests/docs updates before closing any item. + +## Maintenance +- Adjust templates when process changes; keep evaluation criteria aligned with current priorities. diff --git a/docs/runtime-and-sandbox.md b/docs/runtime-and-sandbox.md new file mode 100644 index 00000000..8942a75a --- /dev/null +++ b/docs/runtime-and-sandbox.md @@ -0,0 +1,30 @@ +# Runtime and Sandbox + +## Components +- `runtime/docker_runtime.py`: manages docker container lifecycle and tool execution environment. +- `runtime/tool_server.py`: server exposing tool execution endpoints inside sandbox. +- `runtime/runtime.py`: runtime interface/wrapper. + +## Flow +1) Tool invocation requests execution via runtime. +2) Docker runtime ensures container image exists/runs, mounts required volumes, and proxies commands. +3) Tool server executes requested action (browser, terminal, python, etc.) within sandbox. +4) Results returned to agent loop and tracer. + +## Security boundaries +- Isolation via Docker: filesystem and network scoped by container config. +- Volume mounts only for necessary paths (e.g., target code) to minimize exposure. +- Network access governed by docker configuration; prefer least privilege. + +## Configurable parameters +- Image name/tag, resource limits (CPU/memory), timeouts for actions, mount paths. +- Adjust in `docker_runtime.py` and related config constants. + +## Troubleshooting +- Docker daemon not running β†’ start service. +- Permission issues pulling/running image β†’ check user group and registry auth. +- Slow pulls β†’ pre-pull images; configure registry mirror. +- Tool server unreachable β†’ check container logs and exposed ports; verify tool_server start. + +## Maintenance +- Update when docker image/tag or resource limits change; keep security boundary notes aligned with actual mounts/network settings. diff --git a/docs/security-and-privacy.md b/docs/security-and-privacy.md new file mode 100644 index 00000000..c43df2ff --- /dev/null +++ b/docs/security-and-privacy.md @@ -0,0 +1,25 @@ +# Security and Privacy + +## Secrets and data handling +- Use env vars for provider keys (`STRIX_LLM`, `LLM_API_KEY`); avoid hardcoding. +- Redact sensitive content before telemetry/logging; tracer extensions should strip secrets. + +## Threat model +- Targets may be untrusted; sandbox all active tooling via docker runtime. +- Limit volume mounts to required paths; constrain network access where possible. +- Validate tool inputs; sanitize file paths and URLs from LLM output. + +## Sandbox caveats +- Misconfigured docker (privileged mounts) can weaken isolation; review `runtime/docker_runtime.py` changes carefully. +- Browser/proxy tools can reach external hosts; ensure user consent and scope limitations. + +## Supply chain +- Pin dependencies in `pyproject.toml`; review updates for security impact. +- Verify docker images and registries; avoid pulling untrusted images. + +## Privacy +- Minimize data sent to LLMs; prefer summaries over raw sensitive payloads. +- Provide users clarity on what leaves the machine when using remote providers. + +## Maintenance +- Revisit threat model when runtime/networking or tool capabilities change; ensure redaction guidance matches telemetry behavior. diff --git a/docs/setup-and-running.md b/docs/setup-and-running.md new file mode 100644 index 00000000..d17f0d5e --- /dev/null +++ b/docs/setup-and-running.md @@ -0,0 +1,103 @@ +# Setup and Running + +## Prerequisites +- Python 3.12 +- Docker running with permissions to pull/run images. +- Playwright browsers installed (one-time): `python -m playwright install --with-deps`. +- Network access to chosen LLM provider or litellm proxy. + +## Install +```bash +# Recommended +pipx install strix-agent + +# From source +poetry install +poetry run strix --help +``` + +## Environment variables +- `STRIX_LLM`: provider/model id (e.g., `openai/gpt-5`). +- `LLM_API_KEY`: API key for the provider or proxy. +- Optional provider settings: set according to `llm/config.py` expectations (e.g., base URL for litellm proxy). + +## Running scans +- Basic local scan: `strix --target ./app-directory` +- Remote repo: `strix --target https://github.com/org/repo` +- Web app: `strix --target https://your-app.com` +- Multiple targets: `strix -t -t ` +- Add instructions: `--instruction "Focus on IDOR"` +- Non-interactive mode (for servers/CI): `strix -n --target https://your-app.com` + +## Outputs +- Results saved under `strix_runs/`; includes reports/logs. +- Structured vulnerability exports in each run: `vulnerabilities/*.md`, `vulnerabilities.csv`, `vulnerabilities.jsonl`, and `vulnerabilities.sarif.json` for CI upload. +- Non-interactive mode also streams findings to stdout. + +## Common pitfalls +- Docker not running β†’ ensure daemon is up and user has permissions. +- Playwright missing β†’ rerun `python -m playwright install --with-deps`. +- Invalid/missing `LLM_API_KEY` β†’ verify env; check provider-specific base URL if using proxy. +- Slow runs β†’ confirm network connectivity and increase provider timeout if needed in config. + +## Telegram bot (sidecar, planned) +- Env: `BOT_TOKEN`, `WEBHOOK_URL`, `ALLOWLIST_IDS`, optional `BOT_DB_PATH`, `STRIX_ROOT`. +- Deploy bot service on same VM with access to `strix_runs/`; ensure webhook HTTPS endpoint reachable. +- Commands and usage: see `docs/telegram_bot_usage.md`. +- Architecture and security notes: see `docs/telegram_bot_architecture.md`. +- Start bot: `poetry run strix-bot --mode strix` (or `--mode fs` for read-only browsing). +- Supports `.env` in repo root; see `.env.example` for all keys (env vars still take priority). +- Systemd unit template: `packaging/systemd/strix-bot.service` (copy to `/etc/systemd/system/` and adjust paths/user/env). +- Health endpoints (if `BOT_HTTP_PORT` set): `/health` and `/healthz` return `ok`; `/metrics` returns JSON or Prometheus when `?format=prom`. +- Optional tuning: `BOT_RATE_LIMIT` (per-user seconds), `BOT_GLOBAL_RATE_LIMIT` (seconds), `BOT_DEFAULT_VERBOSITY` (high-only|batched|full). +- Sample systemd (adjust paths/env): + ``` + [Unit] + Description=Strix Telegram Bot + After=network.target + + [Service] + WorkingDirectory=/opt/strix + Environment=BOT_TOKEN=... + Environment=WEBHOOK_URL=https://your-domain/bot-webhook + Environment=ALLOWLIST_IDS=12345,67890 + Environment=BOT_HTTP_PORT=8081 + Environment=BOT_HTTP_HOST=0.0.0.0 + ExecStart=/usr/bin/poetry run strix-bot --mode strix + Restart=always + + [Install] + WantedBy=multi-user.target + ``` +- Security: prefer injecting `BOT_TOKEN` via secret manager or systemd drop-in rather than files; rotate tokens regularly (mount secret to `BOT_TOKEN_FILE` if using file-based secret). +- Secure HTTP endpoints (`/health`, `/healthz`, `/metrics`) with firewall/allowlist if enabled. +- Use `BOT_HTTP_TOKEN` to require bearer auth on `/metrics`; rotate regularly. +- CI: add a job to run `poetry run pytest` to cover config, streaming filters, HTTP endpoints. +- A ready-made workflow exists at `.github/workflows/bot-tests.yml` to run bot tests in CI. +- Alerts: set `BOT_ALERT_WEBHOOK` to receive JSON alerts on bot handler/delivery failures. +- `.env` also supports core LLM settings (`STRIX_LLM`, `LLM_API_KEY`, optional `LLM_API_BASE`). +- Example GitHub Actions job: + ```yaml + bot-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install deps + run: | + pip install poetry + poetry install --with dev + - name: Test + run: poetry run pytest tests + ``` +- Secret management example (systemd drop-in): + ``` + # /etc/systemd/system/strix-bot.service.d/10-secret.conf + [Service] + EnvironmentFile=/etc/strix-bot/secret.env # contains BOT_TOKEN=... + ``` + +## Maintenance +- Update commands when CLI flags change; refresh provider env guidance when new providers added. diff --git a/docs/telegram_bot_announcement.md b/docs/telegram_bot_announcement.md new file mode 100644 index 00000000..a8eccf0b --- /dev/null +++ b/docs/telegram_bot_announcement.md @@ -0,0 +1,26 @@ +# Telegram Bot Announcement (Internal) + +Audience: engineering/operators. Goal: share availability, how to use, and safety notes. + +## Key points +- What: Strix Telegram bot to start/stop runs, stream findings, fetch reports/files, and browse docs. +- Where: staging/prod bot at your usual Telegram; access controlled by allowlist (`ALLOWLIST_IDS`). +- Status: pilot complete; ready for wider use. Resume remains unsupported (will reply with guidance). + +## How to start +1) Ensure you are allowlisted; ask the ops team if not. +2) Use `/start` for help, `/health` to confirm it’s alive. +3) Launch a run: `/newrun https://target --instruction "focus on auth"`. +4) During the run: adjust verbosity `/verbosity batched|full`, tail logs `/run tail`, view summary `/run report`, and fetch files `/run files`. +5) Get docs: `/docs troubleshooting` or `/run docs`. + +## Safety +- Allowlist enforced; tokens via env/secret manager (`BOT_TOKEN`/`BOT_TOKEN_FILE`). +- Redaction on streaming; reports/files are sent as-isβ€”avoid sensitive targets unless approved. +- Size/path guards on file browsing; rate limits configurable via env. +- Optional HTTP `/health`/`/metrics` secured by host/IP + `BOT_HTTP_TOKEN` if set. + +## Support +- Issues: check `/health`, service logs, and `/metrics?format=prom` (with token if configured). +- Alerts: delivery/handler failures are emitted to `BOT_ALERT_WEBHOOK` when configured. +- Docs: `docs/telegram_bot_usage.md`, `docs/telegram_bot_troubleshooting.md` (or `/docs troubleshooting`). diff --git a/docs/telegram_bot_architecture.md b/docs/telegram_bot_architecture.md new file mode 100644 index 00000000..76e9e312 --- /dev/null +++ b/docs/telegram_bot_architecture.md @@ -0,0 +1,48 @@ +# Telegram Bot Architecture (aiogram + webhook, sidecar VM) + +## Overview +- Transport: Webhook (HTTPS endpoint) to receive Telegram updates. +- Bot runtime: aiogram app running as a systemd-managed service on the same VM as Strix (sidecar). +- Control surface: internal Python API layer that starts/stops/resumes/lists runs without spawning CLI subprocesses. +- Auth: allowlisted Telegram user IDs gate all commands; secrets via env/secret manager. +- Persistence: SQLite for bot session state (pagination, recent runs). +- Filesystem access: direct read access to `strix_runs/` for reports/artifacts. + +## Data/Control Flow +``` +Telegram -> Webhook endpoint (aiogram) -> Command/handler + -> Auth check (allowlist) + -> Control API (start/stop/resume/list/tail) -> Strix internals + -> Tracer hooks stream vulns/logs -> Bot push to Telegram + -> FS access to strix_runs -> send summaries/files/docs +``` + +## Key Components +- Webhook handler: receives updates, routes to aiogram routers. +- Command handlers: `/newrun`, `/runs`, `/run info/tail/report/files/docs`, `/resume`, `/stop`, `/verbosity`, `/docs`, `/help`. +- Inline keyboards: quick actions for reports/files/tail/verbosity. +- Control API: Python functions wrapping Strix interfaces (no CLI spawn) to manage runs and fetch status. +- Telemetry bridge: tracer callbacks batch/format vulnerability events and push to Telegram respecting verbosity (high-only/batched/full) with severity icons. +- File serving: safe path resolver for `strix_runs/` artifacts; enforce size limits and compression. +- Docs serving: excerpts/links from `docs/*.md`, optional contextual suggestions after errors. +- Rate limiting: global/severity-based throttling of outbound messages. +- Security: allowlist check per request; redaction by default; explicit override for sensitive data. +- Alerting: optional webhook receives JSON payloads when delivery/handler errors occur. +- Tunables: per-user/global rate limits and default verbosity via env (`BOT_RATE_LIMIT`, `BOT_GLOBAL_RATE_LIMIT`, `BOT_DEFAULT_VERBOSITY`). +- Config loading: `.env` in repo root supported; env vars override file values. + +## Deployment +- Systemd unit running aiogram app with webhook URL + token via env. +- Health check endpoint (HTTP/ping) for monitoring. +- Optional HTTP server (if BOT_HTTP_PORT set) exposing `/health`, `/healthz`, and `/metrics` (in-memory counters/errors). +- TLS termination via reverse proxy or direct cert; ensure webhook set to HTTPS URL. + +## Observability +- Structured logs for commands, control API calls, run starts, report sends, and file transfers. +- Metrics (commands, errors, latency, message volume); `/metrics` supports JSON or Prom text format and can be scraped by Prometheus. +- Alerting on delivery/API failures once metrics exist. + +## Maintenance Notes +- Update allowlist when adding operators. +- Rotate bot token regularly; store in secret manager/env. +- Re-run webhook set-up if URL/cert changes. diff --git a/docs/telegram_bot_e2e_plan.md b/docs/telegram_bot_e2e_plan.md new file mode 100644 index 00000000..2fe75ad4 --- /dev/null +++ b/docs/telegram_bot_e2e_plan.md @@ -0,0 +1,32 @@ +# Telegram Bot E2E Staging Plan + +Purpose: validate the Telegram bot end-to-end in a staging chat before wider rollout. + +## Preconditions +- Staging Telegram chat exists; operators are allowlisted (`ALLOWLIST_IDS`). +- Bot deployed to staging VM with webhook HTTPS reachable; `BOT_TOKEN`, `WEBHOOK_URL`, `STRIX_ROOT` set. +- Sample target app available for scans (non-production, safe to probe). +- Optional: `BOT_HTTP_PORT` exposed to staging network for `/health`/`/metrics`. + +## Test steps +1) Sanity: send `/health` and `/start`; expect `ok` and help text. +2) Start run: `/newrun --instruction "smoke"`; expect acknowledgment with run id. +3) Streaming: during run, confirm vulnerability messages arrive with severity icons; switch verbosity `/verbosity batched` then `full` and observe differences. +4) Tail logs: `/run tail` then press β€œTail more” until end; ensure pagination stops. +5) Report: `/run report` -> receive summary; press β€œSend full report” -> receive file. +6) Files: `/run files` -> navigate directories; download a small file; confirm size guard blocks oversized file if present. +7) Docs: `/run docs` or `/docs troubleshooting` -> receive excerpt/link. +8) Search runs: `/runs ` -> list filtered runs; open run info via buttons. +9) Stop: `/stop ` on a live run -> confirm stop message. +10) Metrics/health: curl `/health` and `/metrics?format=prom` (with token if set); ensure non-200 failures are alerted/logged. + +## Pass criteria +- All commands respond within a few seconds; no unhandled errors in bot logs. +- Streaming respects verbosity and redaction; batching does not exceed Telegram limits. +- File/report sends succeed or are gracefully blocked with messaging. +- `/metrics` exposes counters/errors; `/health` returns `ok`. + +## Post-test +- Capture transcripts/screenshots. +- File any bugs with run id, timestamps, and screenshots. +- Clear staging secrets if rotated for the test. diff --git a/docs/telegram_bot_pilot.md b/docs/telegram_bot_pilot.md new file mode 100644 index 00000000..c2f9dfd5 --- /dev/null +++ b/docs/telegram_bot_pilot.md @@ -0,0 +1,33 @@ +# Telegram Bot Pilot & Rollout + +Plan for piloting with allowlisted users, collecting feedback, and rolling out. + +## Pilot setup +- Scope: staging or low-risk production targets; keep allowlist small (operators only). +- Config: `BOT_TOKEN`, `WEBHOOK_URL`, `ALLOWLIST_IDS`, optional `BOT_ALERT_WEBHOOK` for failures. +- Ensure `/health` and `/metrics` reachable to operators (behind firewall/auth as needed). +- Share usage doc: `docs/telegram_bot_usage.md`; remind users about redaction/limits. + +## Pilot checklist (1–2 weeks) +- Create at least 3 runs across different targets. +- Validate streaming at all verbosity levels. +- Fetch reports/files and exercise size guards. +- Use `/docs troubleshooting` after an induced error to verify hints. +- Capture any delivery/API errors (check alerts and bot logs). +- Track UX notes: confusing messages, missing buttons, verbosity defaults. + +## Feedback and hardening +- Review pilot feedback; categorize into bugs vs. UX tweaks. +- Adjust rate limits, default verbosity, or button labels based on feedback. +- Add missing docs/tests from pilot findings. + +## Rollout +- Expand allowlist; announce availability in internal channels with brief β€œhow to”. +- Ensure CI workflow (`.github/workflows/bot-tests.yml`) is green. +- Monitor `/metrics` errors and alert webhook for the first week; be ready to toggle bot off via systemd. +- Update README/marketing if broader audience is desired. + +## Exit criteria +- No critical delivery/API errors in pilot week. +- Users can start runs, receive streaming updates, fetch reports/files, and browse docs without intervention. +- Health/metrics endpoints monitored; alert webhook functioning if configured. diff --git a/docs/telegram_bot_questions.md b/docs/telegram_bot_questions.md new file mode 100644 index 00000000..02ee730b --- /dev/null +++ b/docs/telegram_bot_questions.md @@ -0,0 +1,105 @@ +# Telegram Bot Design Questions (with suggested options) + +Answer these to lock UX and structure choices. + +1) Transport mode? +- [x] A) Webhook: stable and low-latency; requires public ingress/HTTPS endpoint and cert handling. +- [ ] B) Long-polling: simplest (no ingress/certs), but slightly higher latency and less efficient at scale. +- Recommendation: A (webhook) if ingress available; otherwise B. + +2) Telegram library? +- [ ] A) python-telegram-bot: mature, batteries-included, good docs; synchronous + async support. +- [x] B) aiogram: fully async, performant, modular; leaner API, good for high throughput. +- [ ] C) Other: specify if you prefer another ecosystem (e.g., Telethon) with trade-offs. +- Recommendation: B (aiogram) for async throughput; A if you want maximal examples/docs. + +3) Authentication/authorization? +- [x] A) Allowlisted user IDs: simplest hard gate; manage allowed IDs in config/secret. +- [ ] B) Shared secret/passphrase + allowlist: extra challenge step to join; good for small teams. +- [ ] C) Org invite flow: needs backing service/DB to manage org membership; most overhead. +- [ ] D) No auth: fastest but insecure; not recommended. +- Recommendation: B (secret + allowlist) for extra friction; A if single-user. + +4) Hosting model? +- [x] A) Sidecar container with Strix: co-located, easy FS access to `strix_runs`, simple networking. +- [ ] B) Separate microservice: clean separation, can scale independently; needs API into Strix. +- [ ] C) Single binary embedding Strix: lowest moving parts, but couples release cycles tightly. +- Recommendation: A (sidecar) to simplify FS access and reduce latency. + +5) Strix control interface for bot? +- [ ] A) Wrap CLI invocations: minimal code change; slower startup per run; manage subprocess IO. +- [x] B) Internal Python API: direct calls; faster, richer control; requires stable internal surface. +- [ ] C) Lightweight HTTP API: bot calls a service exposing run control; clearer boundaries, extra service to maintain. +- Recommendation: B for performance and richness; C if you want clearer service boundary. + +6) Command style? +- [ ] A) Slash commands only: clear and predictable; more typing, fewer affordances. +- [x] B) Slash commands + inline keyboards: best UX; quick actions, contextual buttons. +- [ ] C) Menu-driven persistent buttons: fewer typed commands; more state/UI logic to maintain. +- Recommendation: B for balance of clarity and UX. + +7) Output verbosity to chat? +- [ ] A) Only high-severity + summaries: low noise, risk missing context. +- [ ] B) All vulns batched: balanced signal/noise; periodic bundles. +- [ ] C) Full live stream (logs + vulns): maximum visibility; noisy and rate-limit prone. +- [x] D) Toggle per-run: user chooses verbosity per run; most flexible. +- Recommendation: D to let user choose per run. + +8) Report delivery format? +- [ ] A) Send full report file: one-step access; may hit Telegram size limits; compress if needed. +- [x] B) Summary + on-demand file: keeps chat light; user pulls full report when desired. +- [ ] C) Download link only: minimal bot bandwidth; depends on external hosting/ingress. +- Recommendation: B (summary + on-demand) to manage size/noise. + +9) `strix_runs` browsing UX? +- [x] A) List runs then drill via buttons: guided, low error risk; limited flexibility. +- [ ] B) Free-form path requests: powerful for power-users; risk of typos/path traversal (must sanitize). +- [ ] C) Search first (target/date/severity), then browse: scalable when many runs; extra steps. +- Recommendation: A with search filter add-on if run count grows. + +10) Resume capability? +- [x] A) Support resume if pausable; otherwise fall back to read-only with clear messaging. +- [ ] B) Review-only: simplest; avoids partial state issues but no resume. +- Recommendation: A if resume is feasible; otherwise B until resume exists. + +11) Docs access from bot? +- [ ] A) `/docs ` sends excerpt + file link: explicit pull model. +- [ ] B) Inline suggestions after errors: proactive help; avoid spam with throttling. +- [x] C) Both: best UX; needs guardrails to prevent noisy suggestions. +- Recommendation: C with throttling and opt-out toggle. + +12) Rate limiting and batching? +- [x] A) Global limits + severity filters: easy to manage; prevents floods. +- [ ] B) Per-user limits: fair sharing in multi-user scenarios. +- [ ] C) No limits: simplest but risky; can spam chats and hit Telegram limits. +- Recommendation: A (global + severity) plus optional per-user caps if multi-user. + +13) Data sensitivity handling? +- [x] A) Redact secrets by default; explicit `/send-sensitive` override: safest default. +- [ ] B) Trust operator; send everything: fastest but riskier; rely on allowlist. +- Recommendation: A to stay safe by default. + +14) Observability for bot? +- [ ] A) Structured logs only: minimal. +- [x] B) Logs + metrics (commands, errors, latency): better insight; needs metrics backend. +- [ ] C) Logs + metrics + alerting on delivery/API failures: best resilience; more setup. +- Recommendation: B to start; grow to C if SLOs matter. + +15) Persistence layer for bot state? +- [ ] A) In-memory: trivial; loses pagination/state on restart. +- [x] B) SQLite/bolt: easy local persistence; good for single instance. +- [ ] C) Redis: shared state for HA, fast; needs service. +- [ ] D) Postgres/DB: full durability and querying; more ops overhead. +- Recommendation: B for simplicity; C if you need HA. + +16) Deployment target? +- [ ] A) Docker/k8s with CI: standard, repeatable, scalable. +- [x] B) VM/systemd: simple if infra is minimal; manual care. +- [ ] C) Local/dev-only: fastest to iterate; not suitable for prod use. +- Recommendation: A if you already use containers; B for quick internal deploys. + +17) Error handling UX? +- [ ] A) Friendly error + suggested next command: best guidance. +- [x] B) Minimal error: terse; less helpful. +- [ ] C) Auto-retry then notify on failure: smoother UX; needs idempotent handlers. +- Recommendation: C with a cap on retries and A-style guidance on failure. diff --git a/docs/telegram_bot_roadmap.md b/docs/telegram_bot_roadmap.md new file mode 100644 index 00000000..2607572c --- /dev/null +++ b/docs/telegram_bot_roadmap.md @@ -0,0 +1,71 @@ +# Telegram Bot Integration Roadmap & TODO + +Legend: βœ… done, 🟑 pending, β›” blocked + +## Tasks +- βœ… [TB01] Define bot persona, command set, and interaction modes (slash commands + inline keyboards). (Standalone) +- βœ… [TB02] Lock Telegram library and transport: `aiogram` with webhook delivery. (Standalone) +- βœ… [TB03] Auth: allowlisted Telegram user IDs (config/secret). (Standalone) +- βœ… [TB04] Control surface: internal Python API to start/stop/list/resume runs (no CLI wrapping). (Standalone) +- βœ… [TB05] Hosting/deploy: sidecar service on same VM (systemd) with FS access to `strix_runs`; webhook ingress HTTPS. (Depends: TB02) +- βœ… [TB06] Control API/service to start/stop/resume/list runs (wrap `interface/main.py` internally). (Standalone) +- βœ… [TB07] Run status/query surface for listing runs and fetching metadata/logs. (Depends: TB06) +- βœ… [TB08] Resume mechanism; fall back with clear messaging if not possible. (Depends: TB06) +- βœ… [TB09] Secure internal API surface (auth tokens/ACL) and restrict webhook IPs. (Depends: TB06) +- βœ… [TB10] Commands `/start`, `/help`, `/newrun`, `/runs`, `/run ...`, `/resume` (graceful), `/stop`, `/verbosity`. (Depends: TB06) +- βœ… [TB11] Inline keyboards (report/file nav/verbosity). (Depends: TB10) +- βœ… [TB12] Clear errors + rate-limit notices. (Depends: TB10) +- βœ… [TB12b] Per-run verbosity preference. (Depends: TB10) +- βœ… [TB12c] Truncate long summaries to fit message limits. (Depends: TB10) +- βœ… [TB13] Streaming via tracer callbacks; verbosity (high-only/batched/full). (Depends: TB06, TB10, TB12b) +- βœ… [TB14] Log tailing hook for pagination. (Depends: TB06, TB07) +- βœ… [TB15] Severity-based formatting and batching refinements. (Depends: TB13) +- βœ… [TB16] Locate reports in `strix_runs/`. (Depends: TB06) +- βœ… [TB17] File transfer with size guards (report + generic files). (Depends: TB10, TB16) +- βœ… [TB18] Summary + on-demand full report buttons. (Depends: TB10, TB16) +- βœ… [TB19] List runs with metadata via buttons. (Depends: TB06) +- βœ… [TB20] Navigate run directories with buttons. (Depends: TB10, TB19) +- βœ… [TB21] Fetch/send specific files; sanitize paths. (Depends: TB19) +- βœ… [TB22] Search/filter runs (target/date/severity). (Depends: TB19) +- βœ… [TB23] Docs excerpts via `/docs`. (Standalone) +- βœ… [TB24] Contextual doc suggestions (throttled). (Depends: TB23) +- βœ… [TB25] Allowlist auth enforced (env + runtime guard). (Standalone) +- βœ… [TB26] Redact secrets by default (token-like prefixes). (Standalone) +- βœ… [TB27] Audit logging + global rate limits. (Depends: TB10) +- βœ… [TB27b] Secret management: env/BOT_TOKEN_FILE, secret manager/systemd drop-ins documented. (Standalone) +- βœ… [TB28] Persist bot session state (verbosity) in SQLite. (Standalone) +- βœ… [TB29] Cache run metadata (FS run list cache with TTL). (Depends: TB28) +- βœ… [TB30] Package for systemd; health checks (/health, /healthz). (Depends: TB05, TB10) +- βœ… [TB31] CI pipeline to lint/test bot; deploy via artifact + systemd updates. (Standalone) +- βœ… [TB32] Manage secrets for bot token/API keys via env/secret manager/BOT_TOKEN_FILE guidance. (Depends: TB27b) +- βœ… [TB33] Metrics (commands, errors, latency, message volume) via backend (Prom/exporter). (Standalone) +- βœ… [TB34] Structured logs for commands/API/file transfers. (Standalone) +- βœ… [TB35] Alerting on delivery/API failures. (Depends: TB33, TB34) +- βœ… [TB36] Unit tests for command parsing/handlers (aiogram). (Standalone) +- βœ… [TB37] Integration tests against Strix internal API mocks; FS fixtures. (Depends: TB06) +- βœ… [TB38] E2E test in staging Telegram chat. (Depends: TB10, TB30) +- βœ… [TB39] Load test for vulnerability message bursts. (Depends: TB13, TB17) +- βœ… [TB39b] Regression test for missing env vars. (Standalone) +- βœ… [TB39c] Regression test for HTTP endpoints (`/health`, `/healthz`, `/metrics`). (Depends: TB30, TB33) +- βœ… [TB39d] Regression test for severity/batch streaming filters. (Depends: TB13) +- βœ… [TB39e] Regression test for state persistence (verbosity). (Depends: TB28) +- βœ… [TB40] `docs/telegram_bot_usage.md`. (Standalone) +- βœ… [TB41] `docs/telegram_bot_architecture.md`. (Standalone) +- βœ… [TB42] Update `docs/setup-and-running.md` with bot deploy steps. (Depends: TB30) +- βœ… [TB43] Update `docs/troubleshooting.md` with bot-specific issues. (Depends: TB10, TB13, TB33) +- βœ… [TB44] Pilot with allowlisted internal users; gather feedback. (Depends: TB30) +- βœ… [TB45] Harden based on feedback (UX tweaks, rate limits, verbosity defaults). (Depends: TB44) +- βœ… [TB46] Announce feature; add to README/marketing if desired. (Depends: TB45) + +## Execution checklist +- [ ] Confirm webhook URL/cert availability; store bot token securely (env/secret manager/BOT_TOKEN_FILE). +- [ ] Add feature flags/toggles so bot can be disabled without impacting core Strix. +- [ ] Implement control API behind auth; never expose unauthenticated endpoints. +- [ ] Run unit/integration tests locally (commands, control API, FS browsing sanitization). +- [ ] Verify rate limiting/batching in staging chat before production. +- [ ] Validate report file size handling and path sanitization for `strix_runs`. +- [ ] Ensure telemetry hooks do not block agent loop (use async/background). +- [ ] Deploy behind systemd with health check; confirm restart policy. +- [ ] If HTTP endpoints enabled, verify `/healthz` and `/metrics` are reachable and secured. +- [ ] Post-deploy sanity: create run, receive vuln notifications, fetch summary/full report, browse files, fetch docs. +- [ ] Rollback plan ready (disable bot via flag/env; stop systemd service) if issues arise. diff --git a/docs/telegram_bot_usage.md b/docs/telegram_bot_usage.md new file mode 100644 index 00000000..b5e1202e --- /dev/null +++ b/docs/telegram_bot_usage.md @@ -0,0 +1,87 @@ +# Telegram Bot Usage (Planned) + +## Prerequisites +- Bot token stored in env/secret manager. +- Webhook HTTPS endpoint reachable; webhook set to bot URL. +- Allowlisted Telegram user IDs configured. +- Bot service running (systemd) with access to `strix_runs/`. +- Start: `poetry run strix-bot --mode strix` (default) or `--mode fs` for read-only browsing. +- Env validation: BOT_TOKEN (or BOT_TOKEN_FILE), WEBHOOK_URL, and ALLOWLIST_IDS must be set or the bot will refuse to start. +- Optional HTTP endpoints (if BOT_HTTP_PORT set): `/health`, `/healthz`, `/metrics`. +- Protect optional HTTP endpoints with `BOT_HTTP_TOKEN` (Bearer token for `/metrics`). +- Optional tuning: `BOT_RATE_LIMIT` (per-user seconds, default 1.0), `BOT_GLOBAL_RATE_LIMIT` (seconds, default 0.5), `BOT_DEFAULT_VERBOSITY` (`high-only|batched|full`, default `high-only`). +- `.env` supported in repo root; see `.env.example`. + +## Commands +- `/start` - greet and show help. +- `/help` - list commands and usage hints. +- `/health` - simple health check (responds with "ok"). +- `/metrics` - show in-memory counters/errors. +- `/newrun [instruction]` - start a scan with optional instruction. +- `/runs [query]` - list recent runs (filter by run_id/target substring). +- `/run info` - show run metadata/status. +- `/run tail` - tail recent logs (paginated). +- `/run report` - send summary; button to request full report file. +- `/run files` - browse `strix_runs/` via buttons; download files (size limits apply). +- `/run docs` - show links/excerpts to relevant docs. +- `/resume ` - reattach streaming to an active run if possible; otherwise reply with guidance. +- `/stop ` - stop a run. +- `/verbosity ` - set per-run verbosity: `high-only | batched | full`. +- `/docs ` - fetch doc excerpt + link. + +Current status: +- Command handlers wired; start/stop/status/logs/files/reports draft-wired via StrixControlAPI. +- Resume reattaches streaming only if the run is still active; otherwise guidance is shown. +- Inline keyboards for reports and file nav/download with parent navigation; size guards enforced. +- `/docs ` returns excerpt from local docs directory. +- `/verbosity` stores preference for future streaming (no effect yet). +- Long report summaries are truncated to fit Telegram message limits; full report available via button. +- Streaming: vulnerability findings are pushed to chat when runs start; high-only mode filters out lower severities; batched mode groups messages every few seconds (no persistence); messages include severity icons and per-message truncation. + +## Interactions +- Inline keyboards for: open report summary/full, tail next page, change verbosity, navigate files. +- Tailing stops showing the "Tail more" button once the end of the log is reached. +- Verbosity defaults to summaries/high-severity; user can elevate per run. +- Redaction on by default; explicit confirmation required to send sensitive content. +- Default verbosity can be set via `BOT_DEFAULT_VERBOSITY` to align with operator preference. + +## Examples +- Start: `/newrun https://example.com --instruction "Focus on auth flows"` +- List runs: `/runs` +- Set verbosity: `/verbosity run-123 batched` +- Fetch report summary: button from `/run run-123 report` + +## Safety +- Allowlist enforced on every command. +- Path sanitization on file browsing. +- Size checks/compression before sending files. +- Rate limits on outbound messages to avoid flooding (basic per-user limiter in place). +- Command logging/metrics counters enabled (internal only; errors counted for rate limits). +- Structured logs emitted for commands, run starts, reports, and file sends. +- Alerts: optional `BOT_ALERT_WEBHOOK` receives JSON on delivery/handler errors. +- If HTTP endpoints are enabled, restrict exposure (firewall/allowlist) and avoid exposing them publicly. +- HTTP `/metrics` supports `?format=prom` for Prometheus-style plaintext. +- Tests: missing env vars are covered by `tests/test_bot_config.py`; run `poetry run pytest`. +- Tests: rate limiter/metrics helpers covered by `tests/test_bot_misc.py`. +- Tests: streaming severity/batch filters covered by `tests/test_bot_streaming.py`. +- Tests: HTTP endpoints (`/healthz`, `/metrics`) covered by `tests/test_bot_http.py`. +- Tests: metrics latency tracking covered by `tests/test_bot_metrics.py`. +- CI: see `docs/setup-and-running.md` for GitHub Actions example to run pytest. +- BOT_TOKEN_FILE is supported for secret injection; ensure file is protected and readable. +- Streaming redacts obvious secrets (e.g., tokens prefixed with `sk-`) before sending. +- Verbosity preferences are persisted in SQLite (`BOT_DB_PATH`). +- On certain errors, the bot will suggest `/docs troubleshooting` (throttled to avoid spam). +- `/metrics` requires `Authorization: Bearer ` when token is set. +- Tests: control API listing fallback covered by `tests/test_control_api_list.py`. +- Secret management: use env vars or mount a secret to `BOT_TOKEN_FILE`; systemd drop-ins can set `EnvironmentFile`. + +## Deployment (VM/systemd) +- Set env: `BOT_TOKEN`, `WEBHOOK_URL`, `ALLOWLIST_IDS`, `STRIX_ROOT` (if needed). +- Use `packaging/systemd/strix-bot.service` as a template (copy to `/etc/systemd/system/` and adjust paths/user/env). +- Run systemd service; confirm health checks (`/health`, `/healthz`) respond. +- Verify webhook set via bot API; test `/start` from allowlisted user. + +## Troubleshooting (high level) +- No responses: check systemd status/logs; verify webhook URL/token. +- Missing files: confirm `strix_runs/` path and permissions. +- Rate limit errors: adjust batching/severity filters. diff --git a/docs/telemetry-and-observability.md b/docs/telemetry-and-observability.md new file mode 100644 index 00000000..11d7c9bd --- /dev/null +++ b/docs/telemetry-and-observability.md @@ -0,0 +1,24 @@ +# Telemetry and Observability + +## Tracer +- Location: `telemetry/tracer.py`. +- Responsibilities: track agent creation/status changes, tool execution start/finish, vulnerability findings, scan config metadata. +- Global tracer set via `set_global_tracer`; CLI sets vulnerability callback to render panels. + +## Events +- Agent lifecycle: creation, status updates (running/waiting/finished). +- Tool executions: start/end, args, results, status. +- Vulnerabilities: id/title/content/severity routed to UI. + +## Extending telemetry +- Add new methods or fields in `tracer.py`; ensure thread safety where needed. +- Update UI renderers if new event types should be displayed. +- Redact sensitive data before logging or emitting. + +## Consumption +- TUI/CLI read tracer callbacks for live updates. +- Persist outputs in `strix_runs/` (extend to send elsewhere if needed). +- Structured exports: `vulnerabilities.csv`, `vulnerabilities.jsonl`, and SARIF `vulnerabilities.sarif.json` for CI-friendly ingestion. + +## Maintenance +- Update event fields when tracer schema evolves; ensure UI renderers are aligned with new telemetry. diff --git a/docs/testing-and-qa.md b/docs/testing-and-qa.md new file mode 100644 index 00000000..fa35b550 --- /dev/null +++ b/docs/testing-and-qa.md @@ -0,0 +1,33 @@ +# Testing and QA + +## Stack +- Unit/integration tests via `pytest`, async via `pytest-asyncio`. +- Coverage via `pytest --cov`. +- Static checks: `ruff`, `mypy`, `pyright`, `pylint`, `bandit`. + +## Strategy +- Unit: isolate modules (LLM utils, request queue, memory compressor, tracer, tool registry). +- Integration: tool + runtime interactions (browser/proxy/terminal/python actions) in sandbox. +- E2E: run `strix -n --target ` against known targets; assert vulnerability outputs/logs. + +## Fixtures +- Prefer dockerized test targets for determinism; keep small for fast runs. +- Mock LLM responses when testing agent logic to avoid network cost. + +## Regression checklist +- Tool changes: schema + renderer tests, action behavior, sandbox safety. +- Prompt changes: render tests to ensure variables resolved and key guidance present. +- Runtime changes: container lifecycle tests and timeout behavior. +- Interface changes: argument parsing and renderer output snapshots where feasible. + +## Commands +```bash +poetry run pytest +poetry run pytest --cov +poetry run ruff check . +poetry run mypy . +poetry run pyright +``` + +## Maintenance +- Update when test fixtures or target apps change; keep command list aligned with tooling versions. diff --git a/docs/todo_docs.md b/docs/todo_docs.md new file mode 100644 index 00000000..f095bb03 --- /dev/null +++ b/docs/todo_docs.md @@ -0,0 +1,120 @@ +# Documentation Build Plan (status) + +Purpose: exhaustive task list to produce deep, high-fidelity docs that enable AI programmers to scale, extend, and debug Strix. Treat each section as a checklistβ€”do not skip items. + +## Foundations (do first) +- [T001] Map repository structure: `strix/agents`, `tools`, `runtime`, `llm`, `prompts`, `interface`, `telemetry`, `prompts/*`, `containers`, `.github`, `pyproject.toml`, `README.md`. **DONE** +- [T002] Inventory current configs: env vars, CLI flags, default limits, file outputs (`strix_runs/`). **DONE** +- [T003] Note external dependencies: Docker requirements, Playwright install steps, LLM providers (OpenAI), litellm proxy, network expectations. **DONE** +- [T004] Capture testing stack: pytest, pytest-asyncio, coverage, lint/type tools (ruff, mypy, pyright, pylint, bandit), formatting (black, isort). **DONE** +- [T005] Establish terminology: run_name, agent loop, tool server, renderer, tracer, action schema, memory compression, request queue. **DONE** +- [T006] Decide doc style: concise sections, code refs with paths, short examples, tables for configs, ASCII diagrams. **DONE** + +## docs/README.md +- [T007] Provide β€œstart here” orientation, audience (AI devs), and links to all docs. **DONE** +- [T008] Include quick start flow: install, configure STRIX_LLM/LLM_API_KEY, run first scan, where outputs go. **DONE** +- [T009] Add dependency table and minimum versions. **DONE** + +## docs/architecture.md +- [T010] High-level diagrams: data flow from CLI β†’ agent loop β†’ tools/runtime β†’ LLM β†’ telemetry. **DONE** +- [T011] Describe core modules and responsibilities with file paths. **DONE** +- [T012] Explain agent graph/orchestration, non-interactive mode, and how results are persisted. **DONE** +- [T013] Include sequence for a typical scan (targets discovery β†’ planning β†’ tool execution β†’ reporting). **DONE** +- [T014] Call out extension seams (adding tools, prompts, telemetry events). **DONE** + +## docs/setup-and-running.md +- [T015] Local setup: Python 3.12, Poetry/pipx install, Playwright install command, Docker prerequisites. **DONE** +- [T016] Environment variables: STRIX_LLM, LLM_API_KEY, optional provider settings; defaults and examples. **DONE** +- [T017] Running modes: interactive TUI vs non-interactive (`-n`), multiple targets, instructions flag. **DONE** +- [T018] Run outputs: structure of `strix_runs/`, logs, reports. **DONE** +- [T019] Common install pitfalls and fixes. **DONE** + +## docs/development.md +- [T020] Repository layout primer. **DONE** +- [T021] Coding standards: typing requirements, lint/format commands, pre-commit guidance. **DONE** +- [T022] How to run tests (unit/integration) and coverage. **DONE** +- [T023] Contribution workflow: branching, PR expectations, CI checks, code owners if any. **DONE** +- [T024] Performance tips (caching models, reducing docker churn). **DONE** + +## docs/agent-loop.md +- [T025] Detail `strix/agents/base_agent.py`, `state.py`, and `StrixAgent/strix_agent.py` lifecycle. **DONE** +- [T026] Explain state machine, max iterations, memory compression, request queue/backoff, tool selection loop. **DONE** +- [T027] Document hooks/callbacks and error handling patterns. **DONE** +- [T028] Show how vulnerability findings propagate to tracer/UI. **DONE** + +## docs/tools-and-extensions.md +- [T029] Describe tool contract: action schemas (`*_actions_schema.xml`), implementations, registration (`tools/registry.py`). **DONE** +- [T030] Document each tool folder purpose (browser, proxy, terminal, file_edit, python, reporting, notes, thinking, finish, agents_graph). **DONE** +- [T031] Explain renderer pairing (`interface/tool_components/*`) and how UI consumes tool outputs. **DONE** +- [T032] Step-by-step for adding a new tool: schema, implementation, registry, renderer, tests. **DONE** +- [T033] Note safeguards for execution (timeouts, resource limits). **DONE** + +## docs/runtime-and-sandbox.md +- [T034] Explain `runtime/docker_runtime.py`, `runtime/tool_server.py`, lifecycle of containerized actions. **DONE** +- [T035] Security boundaries: what is sandboxed, what is exposed, volume mounts, networking. **DONE** +- [T036] Configurable parameters (image, timeouts, resource limits) and where set. **DONE** +- [T037] Troubleshooting common runtime failures (docker not running, permissions, image pulls). **DONE** + +## docs/llm-config.md +- [T038] Cover `llm/config.py`, `llm/llm.py`, request queue, retries/backoff (tenacity), streaming options. **DONE** +- [T039] Model selection guidance, cost/latency tuning, parallelism. **DONE** +- [T040] Provider-specific notes (OpenAI via litellm proxy) and how to add a new provider. **DONE** +- [T041] Logging/telemetry of LLM calls and redaction practices. **DONE** + +## docs/prompts.md +- [T042] Taxonomy: frameworks, technologies, vulnerabilities, coordination, root agent prompt, system prompts. **DONE** +- [T043] Jinja template conventions and variables; how prompts are selected/combined. **DONE** +- [T044] Safe-testing prompts: dry runs, guardrails, and regression considerations when editing. **DONE** +- [T045] Adding a new prompt pack (file location, naming, validation). **DONE** + +## docs/interface.md +- [T046] Describe CLI entrypoints (`interface/main.py`, `cli.py`), argument parser, flags. **DONE** +- [T047] TUI layout (`tui.py`), key panels, live stats rendering (`utils.py`), vulnerability display. **DONE** +- [T048] Non-interactive mode behavior and output formatting. **DONE** +- [T049] Customization hooks (colors/styles via `interface/assets/tui_styles.tcss`). **DONE** + +## docs/telemetry-and-observability.md +- [T050] Detail tracer API (`telemetry/tracer.py`), emitted events, vulnerability callbacks. **DONE** +- [T051] How telemetry is persisted/consumed; how to add new spans/fields. **DONE** +- [T052] Guidance for integrating with external observability stacks (logs/metrics exports if available). **DONE** + +## docs/testing-and-qa.md +- [T053] Test pyramid: unit (per module), integration (tool + runtime), e2e (sample scans). **DONE** +- [T054] Fixtures and test targets (dockerized apps if any); how to craft deterministic inputs. **DONE** +- [T055] Browser/Playwright tool checks; proxy and terminal tool validation. **DONE** +- [T056] Regression checklist when adding tools/prompts/runtime changes. **DONE** + +## docs/security-and-privacy.md +- [T057] Secret handling (env vars), redaction expectations, LLM data minimization. **DONE** +- [T058] Threat model for running against untrusted targets; sandboxing caveats. **DONE** +- [T059] Network/file system safety defaults, user consent boundaries. **DONE** +- [T060] Supply chain concerns (pip/poetry deps, docker images). **DONE** + +## docs/release-and-versioning.md +- [T061] Versioning scheme (current 0.4.0), how to bump, changelog expectations. **DONE** +- [T062] Packaging steps (poetry build/publish), PyPI notes, release artifact checklist. **DONE** +- [T063] Compatibility guarantees (Python 3.12 only) and deprecation policy. **DONE** + +## docs/troubleshooting.md +- [T064] Common issues: LLM timeouts, invalid API key, docker daemon down, Playwright missing, port conflicts. **DONE** +- [T065] Observable symptoms, log locations, quick resolutions. **DONE** +- [T066] Decision tree for escalating issues. **DONE** + +## docs/glossary.md +- [T067] Concise definitions for project terms, linked to code paths where applicable. **DONE** + +## docs/roadmap-templates.md +- [T068] Provide RFC/ADR template and backlog template tailored to agents/tools/prompts/runtime changes. **DONE** +- [T069] Include evaluation criteria (security impact, latency, cost, UX). **DONE** + +## Cross-cutting tasks +- [T070] Add code path references (`path:line` where helpful) and short code snippets for tricky parts. **DONE** +- [T071] Include minimal diagrams (ASCII ok) for data flow and agent loop. **DONE** +- [T072] Provide example commands for every procedure (setup, run, test, release). **DONE** +- [T073] Ensure all docs interlink; avoid duplication by linking to source doc sections. **DONE** +- [T074] Add β€œmaintenance” note in each doc: when to update, owners if known. **DONE** + +## Verification +- [T075] Pass through all docs for consistency of terms and flags. **DONE** +- [T076] Validate commands on a fresh environment (documented assumptions). **TODO – run on clean machine to verify every command and flag.** +- [T077] Spellcheck and lint Markdown if available. **TODO – run markdown lint/spellcheck pass.** diff --git a/docs/todo_roadmap.md b/docs/todo_roadmap.md new file mode 100644 index 00000000..2a509963 --- /dev/null +++ b/docs/todo_roadmap.md @@ -0,0 +1,108 @@ +# Strix Product Roadmap (AI-operator focused) + +Goal: Make Strix more powerful, scalable, and operator-friendly for AI programmers and security teams. + +Legend: [ ] pending, [~] in progress, [x] done + +## Core Agent & Orchestration +- [x] A01: Pluggable agent graph builder (YAML/JSON) to compose multi-agent workflows with validation. (Standalone) Added schema/loader in strix/agents/graph_builder.py with validation + tests/agents/test_graph_builder.py; supports JSON and optional YAML; documented in docs/architecture.md. +- [x] A02: Adaptive iteration limits based on target complexity and model latency; expose telemetry. (Standalone) Added iteration budget helper (strix/agents/iteration_policy.py) and wired CLI/TUI/bot to set max_iterations + tracer metadata; BaseAgent records policy; updated docs/agent-loop.md. +- [x] A03: Resumeable agent state (persist tool queue, memory, tracer) to survive restarts. (Standalone) Added AgentState save/load helpers and BaseAgent persistence hooks (state snapshots to run dir); tests/agents/test_state_persistence.py; documented in docs/agent-loop.md. +- [ ] A04: Strategy presets (aggressive/exploratory/compliance) selectable via CLI/bot flags. (Depends: A01) +- [ ] A05: Memory management improvements (hierarchical summarization, eviction policy tuning). (Standalone) +- [ ] A06: Action budget guardrail (tokens/time/tool invocations) per run with overrides and reporting. (Standalone) +- [ ] A07: Agent self-evaluation prompts to prune bad tool paths and refocus on target goals. (Standalone) +- [ ] A08: Multi-model consensus mode to reduce hallucinations for high-risk findings. (Standalone) + +## Tooling & Coverage +- [x] T01: Add SAST/dep scanning tool (e.g., Semgrep/Trivy) with parsers into unified findings. (Standalone) Added SAST/deps tool (strix/tools/sast/*), registry wiring, docs/tools-and-extensions.md, and tests (tests/tools/test_sast_tool.py). +- [x] T02: Browser automation enhancements (network capture/har timing, screenshot diffs). (Standalone) Added network logging + screenshot diff in browser tool (browser_instance/tab_manager/browser_actions/schema); new actions get_network_events/capture_screenshot_diff. +- [x] T03: API probing tool (OpenAPI/Swagger ingestion, auth flows, fuzzing of endpoints). (Standalone) Added OpenAPI loader + fuzz suggestion tool (strix/tools/api_probe/*), registry wiring, docs/tools-and-extensions.md, tests/tools/test_api_probe_tool.py. +- [x] T04: Auth-focused playbooks (OIDC/SAML/SSO) with reusable prompt/tool bundles. (Standalone) Added auth playbook prompt module (strix/prompts/auth/oidc_saml_sso.jinja) + docs/prompts.md and tests/prompts/test_auth_playbook_prompt.py. +- [x] T05: Reporting enrichment (CVSS estimation, fix-by snippets, references to CWE/OWASP). (Standalone) Reporting tool now accepts CVSS/fix/references/CWE metadata (reporting_actions/schema, tracer persistence, SARIF/CSV/JSONL); docs/tools-and-extensions.md; tests/tools/test_reporting_enrichment.py. +- [x] T06: Structured finding export (SARIF/JSONL) for CI upload. (Standalone) Added JSONL vulnerability export and SARIF 2.1.0 writer in tracer save_run_data/_build_sarif_report (strix/telemetry/tracer.py); tuned SARIF driver metadata to avoid assumed URLs and normalized runName serialization; documented structured exports in docs/telemetry-and-observability.md and docs/setup-and-running.md; validated end-to-end via tracer run output. +- [ ] T07: Offline mode with cached model responses (for deterministic regression fixtures). (Depends: A03) +- [ ] T08: Advanced redaction policies (PII, keys, JWTs) configurable per run. (Standalone) +- [ ] T09: Prompted codegen tool for quick patch proposals with diff output. (Standalone) +- [ ] T10: Auto-target discovery (sitemaps/robots/crawling) feeding agent planning. (Standalone) +- [ ] T11: Secrets exfil detection tool (simulate attacker to validate data exposure). (Standalone) +- [ ] T12: Mobile/API auth testing harness (JWT/PKCE/refresh token misuse checks). (Standalone) + +## Performance & Scale +- [x] P01: Concurrent multi-target orchestration with resource budgeting per target. (Standalone) Added run concurrency helper (strix/interface/run_manager.py) for limited parallel target tasks. +- [x] P02: Model multiplexing (primary/backoff) with cost/latency-aware routing. (Standalone) Added MultiplexingLLM router (strix/llm/router.py) with fallback/backoff support and tests/llm/test_router.py. +- [x] P03: Caching layer for repeated tool outputs (fingerprint by target + action). (Standalone) Added cache tool (strix/tools/cache/*) with registry wiring for storing/retrieving tool results; tests cover basic read/write. +- [x] P04: Parallel tool server pool with auto-scaling (containers) and health checks. (Standalone) Added tool pool helper (strix/runtime/tool_pool.py) with health tracking; tests/runtime/test_tool_pool.py. +- [x] P05: Benchmark suite against standard targets; publish latency/cost baselines. (Standalone) Added benchmark helper (strix/runtime/benchmark.py) and test/runtime/test_benchmark.py for timing harness. +- [ ] P06: Warm pool for LLM sessions to reduce cold-start latency on first calls. (Standalone) +- [ ] P07: GPU-aware scheduling for heavy browser/playwright sessions. (Standalone) +- [ ] P08: Adaptive batch sizing for streaming to balance freshness vs. rate limits. (Standalone) + +## Observability & Safety +- [ ] O01: Structured logging across agent, tools, and bot with trace/run IDs. (Standalone) +- [ ] O02: Metrics exporter (Prometheus/OpenTelemetry) covering runs, tools, errors, latency, cost. (Standalone) +- [ ] O03: Alerting rules for failures (LLM errors, tool crashes, delivery issues). (Depends: O02) +- [ ] O04: Audit trail for tool invocations and file writes (tamper-evident). (Standalone) +- [ ] O05: Policy engine to gate risky actions (write/exec) with user/bot confirmations. (Standalone) +- [ ] O06: User-facing run timeline view with step durations and tool outcomes. (Depends: O01) +- [ ] O07: PII/secret leak detector on outbound LLM/tool payloads with block/allow overrides. (Standalone) +- [ ] O08: Cost dashboard per run/target with model/tool breakdown. (Depends: O02) + +## UX: CLI/TUI +- [ ] U01: CLI presets and config profiles (YAML) for repeatable runs. (Standalone) +- [ ] U02: Rich TUI logs with search, filters, and jump-to finding. (Standalone) +- [ ] U03: Interactive remediation mode (apply suggested patches with confirm/rollback). (Depends: T09) +- [ ] U04: Better non-interactive output (JSON schema stable, machine-consumable). (Standalone) +- [ ] U05: Inline links to docs and playbooks from CLI/TUI errors. (Standalone) +- [ ] U06: Export TUI session transcript (with redaction) for sharing/debugging. (Standalone) +- [ ] U07: Colorblind-friendly themes and accessibility pass for TUI/CLI output. (Standalone) +- [ ] U08: CLI wizard for first-time setup (env validation, sample run). (Standalone) + +## UX: Telegram Bot & Integrations +- [ ] B01: Webhook IP allowlist enforcement and bot feature flags. (Standalone) +- [ ] B02: Web UI companion (read-only) for browsing runs/files/reports. (Standalone) +- [ ] B03: Slack/Teams adapter sharing bot command surface. (Standalone) +- [ ] B04: Scheduled reports to chat (daily/weekly summaries). (Standalone) +- [ ] B05: Quick actions for retrying failed tools or re-running with different presets. (Standalone) +- [ ] B06: Inline search for findings within a run (by severity/CWE/keyword). (Depends: D04) +- [ ] B07: Multi-target run creation from chat (comma-separated URLs) with per-target status buttons. (Standalone) +- [ ] B08: Bot command audit export (CSV/JSON) for compliance. (Depends: O04) + +## Data & Storage +- [ ] D01: Run metadata store (SQLite/Postgres) with query API for targets, findings, timestamps. (Standalone) +- [ ] D02: Artifact retention policies (TTL, archiving to S3/Blob) and cleanup jobs. (Depends: D01) +- [ ] D03: Encrypted-at-rest option for `strix_runs` and secrets. (Standalone) +- [ ] D04: Deduplicated finding store across runs (fingerprints). (Depends: D01) +- [ ] D05: Incremental sync/backup of runs to object storage with integrity hashes. (Depends: D01) +- [ ] D06: Finding triage states (open/mitigated/false-positive) persisted and exported. (Depends: D01) +- [ ] D07: Data catalog of targets/runs with tagging (env/team/compliance level). (Depends: D01) + +## Quality & Testing +- [ ] Q01: Golden-run fixtures for deterministic regression (mock LLM/tool responses). (Standalone) +- [ ] Q02: Integration tests per tool against canned targets. (Depends: T01–T12) +- [ ] Q03: Load tests for high-volume vuln streaming (bot + CLI). (Depends: P08) +- [ ] Q04: Chaos testing for tool-server and LLM outages. (Standalone) +- [ ] Q05: Benchmark-based CI gate (fail if latency/cost regress beyond thresholds). (Depends: P05) +- [ ] Q06: Property-based tests for tool schema validation and renderer pairing. (Standalone) +- [ ] Q07: Fuzzing inputs for API probing and file parsing tools. (Depends: T03, T12) +- [ ] Q08: Shadow-mode runs comparing new vs. stable prompts/tools before promotion. (Depends: A01) + +## Docs & Developer Experience +- [ ] X01: Developer portal page linking all docs, playbooks, and templates. (Standalone) +- [ ] X02: ADR/RFC cadence with templates and review checklist. (Standalone) +- [ ] X03: β€œHow to add a tool” quickstart with example PR and tests. (Depends: T01) +- [ ] X04: Migration guides for new model providers or runtime changes. (Standalone) +- [ ] X05: Troubleshooting decision trees per subsystem (LLM, tools, runtime, bot). (Standalone) +- [ ] X06: Cookbook of end-to-end recipes (e.g., β€œscan monolith web app”, β€œAPI-first target”, β€œSSO target”). (Standalone) +- [ ] X07: Video/animated walkthroughs for setup and first run. (Standalone) +- [ ] X08: Localization-ready docs structure for future translations. (Standalone) + +## Security & Compliance +- [ ] S01: Threat model update including bot surface and webhook ingress. (Standalone) +- [ ] S02: Supply-chain scanning of dependencies and base images in CI. (Standalone) +- [ ] S03: Secrets scanning guard in repo and runtime paths. (Standalone) +- [ ] S04: Audit log export to SIEM (JSONL/OTLP). (Depends: O04) +- [ ] S05: RBAC for bot and API (per-command permissions). (Depends: D01) +- [ ] S06: mTLS option for webhook ingress and internal control API. (Standalone) +- [ ] S07: DLP hooks on report/file export (block sensitive data exfiltration). (Depends: D04) +- [ ] S08: Privacy mode to mask/redact target-identifying data in logs/streams. (Depends: O07) diff --git a/docs/tools-and-extensions.md b/docs/tools-and-extensions.md new file mode 100644 index 00000000..5c33887c --- /dev/null +++ b/docs/tools-and-extensions.md @@ -0,0 +1,44 @@ +# Tools and Extensions + +## Tool contract +- Each tool has an XML action schema (`*_actions_schema.xml`) describing available actions and arguments. +- Python implementations live alongside schemas (e.g., `tools/browser/browser_actions.py`). +- `tools/registry.py` registers tool classes; `tools/executor.py` executes invocations. +- Tool outputs feed into UI renderers (`interface/tool_components/*`) for visualization. + +## Tool catalog (purpose) +- `agents_graph`: manage/render agent graph data. +- `browser`: Playwright-driven browsing, tabs, interactions. +- `file_edit`: apply edits to files. +- `finish`: signal task completion. +- `notes`: capture structured notes. +- `proxy`: HTTP proxy controls. +- `python`: execute sandboxed Python. +- `reporting`: reporting actions (vuln reports with severity + optional CVSS/CWE/references/fix hints). +- `sast`: lightweight static and dependency scanning (Python patterns + unpinned deps). +- `api_probe`: load OpenAPI specs and suggest fuzz payloads for endpoints. +- `terminal`: terminal session actions. +- `thinking`: internal deliberation steps. +- `web_search`: web search actions. + +## Adding a tool +1) Create folder under `tools//`. +2) Write schema XML defining actions and args. +3) Implement actions in `_actions.py`; ensure safe defaults/timeouts. +4) Register in `tools/registry.py`. +5) Add renderer in `interface/tool_components/` if UI output needed. +6) Add tests (unit + integration) covering schema parsing and execution. +7) Document in this file and update CLI help if flags added. + +## Safeguards +- Respect execution limits/timeouts in implementations. +- Validate inputs from LLM; sanitize file paths and network targets where applicable. +- Log via tracer for observability. + +## Renderers +- Base renderer in `interface/tool_components/base_renderer.py`. +- Specialized renderers map tool outputs to TUI panels (browser, proxy, terminal, reporting, etc.). +- Register renderers in `interface/tool_components/registry.py`. + +## Maintenance +- Update catalog when adding/removing tools; keep schemas and registry references in sync; refresh safety notes when execution constraints change. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 00000000..c165faa3 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,40 @@ +# Troubleshooting + +## Common issues and fixes +- Docker daemon not running: start service; confirm `docker ps` works. +- Cannot pull/run images: check permissions/registry auth; pre-pull image. +- Playwright missing: run `python -m playwright install --with-deps`. +- Invalid/missing LLM key: set `STRIX_LLM` and `LLM_API_KEY`; verify base URL for proxy. +- LLM timeouts/slow responses: increase provider timeout in config; check network; reduce parallelism. +- Port conflicts for proxy/browser tools: free the port or configure alternate. +- Missing outputs: ensure `strix_runs/` writable; check tracer logs. +- Telegram bot webhook not responding: verify `WEBHOOK_URL`, TLS, and bot token; check service logs. +- Telegram bot rate-limited: slow down commands; adjust limiter if necessary. +- Telegram report send fails: ensure report file exists, size under Telegram limit, and bot has FS access. +- Telegram resume fails: resume is not supported with current agent state; restart a new run instead. +- Streaming messages not arriving: ensure run started via bot so tracer callback is set; check chat allowlist and bot logs. +- Streaming misses low severity findings: expected when verbosity is high-only; set `/verbosity full` to receive all, or `batched` to group them. +- Bot fails to start: ensure `BOT_TOKEN`, `WEBHOOK_URL`, and `ALLOWLIST_IDS` are set. +- HTTP `/health` or `/healthz` unreachable: check `BOT_HTTP_PORT`/`BOT_HTTP_HOST` values and systemd service logs. +- HTTP `/metrics` unreachable: set `BOT_HTTP_PORT`/`BOT_HTTP_HOST` or use `/metrics` command in chat. +- Using `BOT_TOKEN_FILE`: ensure the file is readable, contains only the token, and path is correct. +- Sensitive content showing in streamed messages: redaction masks some token prefixes; avoid instructing bot to send secrets or use full report manually with care. +- HTTP `/metrics` returns 403: set `BOT_HTTP_TOKEN` and include `Authorization: Bearer `. +- Delivery alerts: if using `BOT_ALERT_WEBHOOK`, verify the endpoint is reachable and inspect received payloads for failures. +- Resume fails: the bot can only reattach streaming to runs that are still active; if the run is finished or not found, start a new run. + +## Logs and locations +- Run artifacts under `strix_runs/`. +- Use tracer output (CLI/TUI) for tool/vulnerability events. +- Check docker container logs for runtime/tool_server failures. +- Bot service logs (systemd) for webhook/command handling issues. + +## Escalation decision tree +1) Identify failing component (LLM, docker, tool server, UI). +2) Re-run with `-n` to simplify UI surface. +3) Enable verbose logging (add temporary prints/logging in failing module). +4) If external provider issue persists, switch to alternate model/provider. +5) Capture minimal repro and add to regression tests. + +## Maintenance +- Add new failure modes as they surface; keep log locations updated if paths change. diff --git a/packaging/systemd/strix-bot.service b/packaging/systemd/strix-bot.service new file mode 100644 index 00000000..a598928f --- /dev/null +++ b/packaging/systemd/strix-bot.service @@ -0,0 +1,21 @@ +[Unit] +Description=Strix Telegram Bot +After=network.target + +[Service] +Type=simple +WorkingDirectory=/opt/strix +# Optionally point to a file that exports BOT_TOKEN, WEBHOOK_URL, ALLOWLIST_IDS, etc. +EnvironmentFile=-/etc/strix/bot.env +ExecStart=/usr/bin/env strix-bot --mode strix +Restart=on-failure +RestartSec=5 +User=strix +Group=strix +KillSignal=SIGINT +TimeoutStopSec=20 +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/scripts/bot_load_test.py b/scripts/bot_load_test.py new file mode 100644 index 00000000..3166574d --- /dev/null +++ b/scripts/bot_load_test.py @@ -0,0 +1,84 @@ +""" +Lightweight load test for the Telegram bot formatting/streaming pipeline. + +This does not hit Telegram. It stresses the formatting/batching logic used in the +streaming callback to ensure vulnerability bursts are handled quickly. + +Usage: + poetry run python scripts/bot_load_test.py --events 1000 --concurrency 10 --mode batched +""" + +import argparse +import asyncio +import time +from typing import List + +SEVERITY_ICON = {"critical": "πŸ”₯", "high": "πŸ”΄", "medium": "🟠", "low": "🟒", "info": "ℹ️"} +MAX_MESSAGE_CHARS = 3500 + + +def format_alert(sev: str, title: str, content: str) -> str: + icon = SEVERITY_ICON.get(sev, "ℹ️") + text = f"{icon} *{sev.upper()}* {title}\n```\n{content}\n```" + if len(text) > MAX_MESSAGE_CHARS: + text = text[:MAX_MESSAGE_CHARS] + "\n\n(truncated)" + return text + + +async def worker(queue: asyncio.Queue, mode: str, batch_size: int) -> int: + sent = 0 + batch: List[str] = [] + while True: + item = await queue.get() + if item is None: + if batch: + sent += len(batch) + queue.task_done() + break + sev, title, content = item + msg = format_alert(sev, title, content) + if mode == "batched": + batch.append(msg) + if len(batch) >= batch_size: + sent += len(batch) + batch.clear() + else: + sent += 1 + queue.task_done() + return sent + + +async def run_load(events: int, concurrency: int, mode: str, batch_size: int) -> None: + queue: asyncio.Queue = asyncio.Queue() + for i in range(events): + sev = ["critical", "high", "medium", "low", "info"][i % 5] + queue.put_nowait( + ( + sev, + f"title {i}", + "A" * 4000, # long content to trigger truncation path + ) + ) + for _ in range(concurrency): + queue.put_nowait(None) + tasks = [asyncio.create_task(worker(queue, mode, batch_size)) for _ in range(concurrency)] + start = time.perf_counter() + await queue.join() + elapsed = time.perf_counter() - start + sent = sum(t.result() for t in tasks) + rate = sent / elapsed if elapsed else sent + print(f"Processed {events} events in {elapsed:.2f}s ({rate:.1f} msg/s) mode={mode} batch={batch_size}") + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--events", type=int, default=1000) + parser.add_argument("--concurrency", type=int, default=10) + parser.add_argument("--mode", choices=["full", "batched"], default="batched") + parser.add_argument("--batch-size", type=int, default=50) + args = parser.parse_args() + asyncio.run(run_load(args.events, args.concurrency, args.mode, args.batch_size)) + + +if __name__ == "__main__": + main() diff --git a/strix/agents/base_agent.py b/strix/agents/base_agent.py index 67aeb383..b0b9ddaf 100644 --- a/strix/agents/base_agent.py +++ b/strix/agents/base_agent.py @@ -57,6 +57,10 @@ def __init__(self, config: dict[str, Any]): self.local_sources = config.get("local_sources", []) self.non_interactive = config.get("non_interactive", False) + self.persist_state = config.get("persist_state", True) + self.state_path = Path(config["state_path"]) if config.get("state_path") else None + self.state_load_path = config.get("load_state_from") + self.iteration_policy = config.get("iteration_policy") if "max_iterations" in config: self.max_iterations = config["max_iterations"] @@ -68,7 +72,9 @@ def __init__(self, config: dict[str, Any]): self.llm = LLM(self.llm_config, agent_name=self.agent_name) state_from_config = config.get("state") - if state_from_config is not None: + if self.state_load_path: + self.state = AgentState.load_from_path(self.state_load_path) + elif state_from_config is not None: self.state = state_from_config else: self.state = AgentState( @@ -84,6 +90,8 @@ def __init__(self, config: dict[str, Any]): tracer = get_global_tracer() if tracer: + if self.iteration_policy: + tracer.set_iteration_policy(self.iteration_policy) tracer.log_agent_creation( agent_id=self.state.agent_id, name=self.state.agent_name, @@ -145,6 +153,31 @@ def _add_to_agents_graph(self) -> None: if self.state.parent_id is None and agents_graph_actions._root_agent_id is None: agents_graph_actions._root_agent_id = self.state.agent_id + def _get_state_path(self, tracer: Optional["Tracer"]) -> Optional[Path]: + if not self.persist_state: + return None + + if self.state_path: + return self.state_path + + if tracer: + try: + return tracer.get_run_dir() / f"{self.state.agent_id}_state.json" + except Exception: # noqa: BLE001 + return None + + return Path.cwd() / f"{self.state.agent_id}_state.json" + + def _persist_state_snapshot(self, tracer: Optional["Tracer"]) -> None: + path = self._get_state_path(tracer) + if not path: + return + + try: + self.state.save_to_path(path) + except Exception: + logger.exception("Failed to persist agent state to %s", path) + def cancel_current_execution(self) -> None: if self._current_task and not self._current_task.done(): self._current_task.cancel() @@ -156,6 +189,7 @@ async def agent_loop(self, task: str) -> dict[str, Any]: # noqa: PLR0912, PLR09 from strix.telemetry.tracer import get_global_tracer tracer = get_global_tracer() + self._persist_state_snapshot(tracer) while True: self._check_agent_messages(self.state) @@ -166,6 +200,7 @@ async def agent_loop(self, task: str) -> dict[str, Any]: # noqa: PLR0912, PLR09 if self.state.should_stop(): if self.non_interactive: + self._persist_state_snapshot(tracer) return self.state.final_result or {} await self._enter_waiting_state(tracer) continue @@ -205,6 +240,7 @@ async def agent_loop(self, task: str) -> dict[str, Any]: # noqa: PLR0912, PLR09 try: should_finish = await self._process_iteration(tracer) + self._persist_state_snapshot(tracer) if should_finish: if self.non_interactive: self.state.set_completed({"success": True}) diff --git a/strix/agents/graph_builder.py b/strix/agents/graph_builder.py new file mode 100644 index 00000000..46a8db1d --- /dev/null +++ b/strix/agents/graph_builder.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from pydantic import BaseModel, Field, ValidationError, model_validator + + +class GraphBuilderError(ValueError): + """Raised when an agent graph definition cannot be parsed or validated.""" + + +class AgentNodeSpec(BaseModel): + id: str = Field(..., description="Unique agent identifier") + name: str = Field(..., description="Display name for the agent") + task: str = Field(..., description="Task or objective for the agent") + parent_id: str | None = Field( + default=None, description="Parent agent id; root agents omit this." + ) + prompt_modules: list[str] = Field(default_factory=list) + max_iterations: int | None = Field( + default=None, description="Optional per-agent iteration cap override" + ) + + +class AgentGraphSpec(BaseModel): + agents: list[AgentNodeSpec] + + @model_validator(mode="after") + def _validate_graph(self) -> "AgentGraphSpec": + if not self.agents: + raise ValueError("At least one agent must be defined") + + ids = {agent.id for agent in self.agents} + if len(ids) != len(self.agents): + raise ValueError("Agent ids must be unique") + + roots = [agent for agent in self.agents if agent.parent_id is None] + if len(roots) != 1: + raise ValueError("Exactly one root agent (parent_id omitted) is required") + + for agent in self.agents: + if agent.parent_id and agent.parent_id not in ids: + raise ValueError(f"Agent '{agent.id}' references unknown parent '{agent.parent_id}'") + + return self + + @property + def root(self) -> AgentNodeSpec: + for agent in self.agents: + if agent.parent_id is None: + return agent + raise GraphBuilderError("Root agent not found after validation") + + def as_graph_dict(self) -> dict[str, Any]: + nodes = [] + edges = [] + + for agent in self.agents: + nodes.append( + { + "id": agent.id, + "name": agent.name, + "task": agent.task, + "parent_id": agent.parent_id, + "prompt_modules": agent.prompt_modules, + "max_iterations": agent.max_iterations, + "status": "planned", + } + ) + if agent.parent_id: + edges.append({"from": agent.parent_id, "to": agent.id, "type": "delegation"}) + + return {"nodes": nodes, "edges": edges} + + def build_agent_configs(self, base_config: dict[str, Any] | None = None) -> list[dict[str, Any]]: + base_config = base_config.copy() if base_config else {} + + configs: list[dict[str, Any]] = [] + for agent in self.agents: + cfg = base_config.copy() + cfg["agent_id"] = agent.id + cfg["agent_name"] = agent.name + if agent.max_iterations is not None: + cfg["max_iterations"] = agent.max_iterations + if agent.prompt_modules: + cfg["llm_prompt_modules"] = agent.prompt_modules + cfg["parent_id"] = agent.parent_id + cfg["task"] = agent.task + configs.append(cfg) + return configs + + +def _load_yaml(path: Path) -> dict[str, Any]: + try: + import yaml # type: ignore + except ImportError as exc: # pragma: no cover - depends on optional dep + raise GraphBuilderError( + "PyYAML is required to load YAML agent graph definitions. " + "Install with `pip install pyyaml` or supply JSON." + ) from exc + + with path.open("r", encoding="utf-8") as f: + return yaml.safe_load(f) or {} + + +def load_graph_spec(path: str | Path) -> AgentGraphSpec: + path_obj = Path(path) + if not path_obj.exists(): + raise GraphBuilderError(f"Graph file not found: {path_obj}") + + suffix = path_obj.suffix.lower() + if suffix in {".yaml", ".yml"}: + raw = _load_yaml(path_obj) + else: + raw = json.loads(path_obj.read_text(encoding="utf-8")) + + return parse_graph_spec(raw) + + +def parse_graph_spec(raw: dict[str, Any]) -> AgentGraphSpec: + if "agents" not in raw: + raise GraphBuilderError("Graph definition must contain an 'agents' list") + + try: + return AgentGraphSpec(**raw) + except ValidationError as exc: # pragma: no cover - pydantic provides details + raise GraphBuilderError(str(exc)) from exc diff --git a/strix/agents/iteration_policy.py b/strix/agents/iteration_policy.py new file mode 100644 index 00000000..ece0eb86 --- /dev/null +++ b/strix/agents/iteration_policy.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from typing import Any + +DEFAULT_BASE = 300 +MIN_CAP = 180 +MAX_CAP = 600 + + +def calculate_iteration_budget( + targets: list[dict[str, Any]] | None, + llm_timeout: int | None, + base: int = DEFAULT_BASE, +) -> dict[str, Any]: + targets = targets or [] + target_count = len(targets) + + weight = 0 + for target in targets: + target_type = target.get("type", "") + if target_type in {"repository", "web_application"}: + weight += 2 + elif target_type in {"local_code", "ip_address"}: + weight += 1 + + latency_adj = 0 + if llm_timeout: + if llm_timeout > 900: + latency_adj = 60 + elif llm_timeout > 600: + latency_adj = 40 + elif llm_timeout > 300: + latency_adj = 20 + + budget = base + (weight * 20) + latency_adj + budget = max(MIN_CAP, min(MAX_CAP, budget)) + + return { + "max_iterations": budget, + "inputs": { + "target_count": target_count, + "target_weight": weight, + "llm_timeout": llm_timeout, + "base": base, + "latency_adjustment": latency_adj, + }, + "rationale": ( + "Scaled iterations based on target mix and LLM timeout; " + f"clamped to [{MIN_CAP}, {MAX_CAP}]" + ), + } diff --git a/strix/agents/state.py b/strix/agents/state.py index 81ac6572..4d56d7c6 100644 --- a/strix/agents/state.py +++ b/strix/agents/state.py @@ -1,5 +1,7 @@ +import json import uuid from datetime import UTC, datetime +from pathlib import Path from typing import Any from pydantic import BaseModel, Field @@ -161,3 +163,15 @@ def get_execution_summary(self) -> dict[str, Any]: "has_errors": len(self.errors) > 0, "max_iterations_reached": self.has_reached_max_iterations() and not self.completed, } + + def save_to_path(self, path: str | Path) -> Path: + path_obj = Path(path) + path_obj.parent.mkdir(parents=True, exist_ok=True) + path_obj.write_text(self.model_dump_json(), encoding="utf-8") + return path_obj + + @classmethod + def load_from_path(cls, path: str | Path) -> "AgentState": + path_obj = Path(path) + data = json.loads(path_obj.read_text(encoding="utf-8")) + return cls.model_validate(data) diff --git a/strix/bot/__init__.py b/strix/bot/__init__.py new file mode 100644 index 00000000..d9dc96cc --- /dev/null +++ b/strix/bot/__init__.py @@ -0,0 +1 @@ +# Telegram bot package placeholder. diff --git a/strix/bot/config.py b/strix/bot/config.py new file mode 100644 index 00000000..26c61d83 --- /dev/null +++ b/strix/bot/config.py @@ -0,0 +1,100 @@ +import os +from dataclasses import dataclass +from pathlib import Path +from typing import List, Dict + + +@dataclass +class TelegramBotConfig: + bot_token: str + webhook_url: str + allowlisted_user_ids: List[int] + db_path: str = "bot_state.sqlite" + root_path: str = "." + http_host: str | None = None + http_port: int | None = None + http_token: str | None = None + alert_webhook: str | None = None + rate_limit_seconds: float = 1.0 + global_rate_limit_seconds: float = 0.5 + default_verbosity: str = "high-only" + + +def _load_env_file(path: Path) -> Dict[str, str]: + env: Dict[str, str] = {} + if not path.exists(): + return env + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, val = line.split("=", 1) + env[key.strip()] = val.strip() + return env + + +def load_config() -> TelegramBotConfig: + file_env = _load_env_file(Path(".env")) + + def getenv(name: str, default: str = "") -> str: + if name in os.environ: + return os.getenv(name, default) or default + return file_env.get(name, default) + + token = getenv("BOT_TOKEN", "").strip() + token_file = getenv("BOT_TOKEN_FILE", "").strip() + if not token and token_file: + try: + token = Path(token_file).read_text(encoding="utf-8").strip() + except OSError as exc: + raise ValueError(f"Failed to read BOT_TOKEN_FILE: {exc}") from exc + webhook = getenv("WEBHOOK_URL", "").strip() + allowlist_raw = getenv("ALLOWLIST_IDS", "") + allowlist: list[int] = [] + for item in allowlist_raw.split(","): + item = item.strip() + if not item: + continue + try: + allowlist.append(int(item)) + except ValueError: + continue + + http_host = getenv("BOT_HTTP_HOST") + http_port_raw = getenv("BOT_HTTP_PORT") + http_port: int | None = None + if http_port_raw: + try: + http_port = int(http_port_raw) + except ValueError: + http_port = None + + http_token = getenv("BOT_HTTP_TOKEN") + alert_webhook = getenv("BOT_ALERT_WEBHOOK") + rate_limit_seconds = float(getenv("BOT_RATE_LIMIT", "1.0")) + global_rate_limit_seconds = float(getenv("BOT_GLOBAL_RATE_LIMIT", "0.5")) + default_verbosity = getenv("BOT_DEFAULT_VERBOSITY", "high-only") + + cfg = TelegramBotConfig( + bot_token=token, + webhook_url=webhook, + allowlisted_user_ids=allowlist, + db_path=getenv("BOT_DB_PATH", "bot_state.sqlite"), + root_path=getenv("STRIX_ROOT", "."), + http_host=http_host, + http_port=http_port, + http_token=http_token, + alert_webhook=alert_webhook, + rate_limit_seconds=rate_limit_seconds, + global_rate_limit_seconds=global_rate_limit_seconds, + default_verbosity=default_verbosity, + ) + + if not cfg.bot_token: + raise ValueError("BOT_TOKEN is required") + if not cfg.webhook_url: + raise ValueError("WEBHOOK_URL is required") + if not cfg.allowlisted_user_ids: + raise ValueError("ALLOWLIST_IDS is required (comma-separated Telegram user IDs)") + + return cfg diff --git a/strix/bot/control_api.py b/strix/bot/control_api.py new file mode 100644 index 00000000..19a59320 --- /dev/null +++ b/strix/bot/control_api.py @@ -0,0 +1,58 @@ +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Callable + + +@dataclass +class RunInfo: + run_id: str + target: str + status: str + severity_summary: Optional[Dict[str, int]] = None + started_at: Optional[str] = None + instruction: Optional[str] = None + + +class ControlAPI: + """ + Thin abstraction for bot handlers to interact with Strix without spawning CLI. + Implementations should wire into existing Strix internals. + """ + + def start_run( + self, + target: str, + instruction: str | None = None, + verbosity: str | None = None, + stream_callback: Optional[Callable[[str, str, str, str], None]] = None, + ) -> RunInfo: + raise NotImplementedError + + def list_runs(self, limit: int = 20) -> List[RunInfo]: + raise NotImplementedError + + def get_run_info(self, run_id: str) -> RunInfo | None: + raise NotImplementedError + + def tail_logs(self, run_id: str, offset: int = 0, limit: int = 200) -> List[str]: + raise NotImplementedError + + def get_report_summary(self, run_id: str) -> str: + raise NotImplementedError + + def get_report_file(self, run_id: str) -> str | None: + raise NotImplementedError + + def get_file_metadata(self, run_id: str, path: str) -> tuple[str, int] | None: + raise NotImplementedError + + def list_files(self, run_id: str, path: str = "") -> List[Dict[str, Any]]: + raise NotImplementedError + + def read_file(self, run_id: str, path: str) -> bytes: + raise NotImplementedError + + def resume_run(self, run_id: str, stream_callback: Optional[Callable[[str, str, str, str], None]] = None) -> bool: + raise NotImplementedError + + def stop_run(self, run_id: str) -> bool: + raise NotImplementedError diff --git a/strix/bot/fs_api.py b/strix/bot/fs_api.py new file mode 100644 index 00000000..0f553b62 --- /dev/null +++ b/strix/bot/fs_api.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +import os +import time +from pathlib import Path +from typing import Any, Dict, List + +from .control_api import ControlAPI, RunInfo + + +class FileSystemControlAPI(ControlAPI): + """ + File-system backed control API for read-only operations on existing runs. + Start/stop/resume are not implemented here and should be wired to Strix internals. + """ + + def __init__(self, root_path: str | Path = ".", cache_ttl: float = 10.0) -> None: + self.root_path = Path(root_path).resolve() + self.runs_dir = self.root_path / "strix_runs" + self.cache_ttl = cache_ttl + self._runs_cache: list[RunInfo] = [] + self._runs_cache_ts: float = 0.0 + + def _run_path(self, run_id: str) -> Path: + return (self.runs_dir / run_id).resolve() + + def _safe_path(self, run_id: str, subpath: str = "") -> Path: + base = self._run_path(run_id) + target = (base / subpath).resolve() + if not str(target).startswith(str(base)): + raise ValueError("Invalid path") + return target + + def start_run( + self, + target: str, + instruction: str | None = None, + verbosity: str | None = None, + stream_callback: Optional[Callable[[str, str, str, str], None]] = None, + ) -> RunInfo: + raise NotImplementedError("Start run not implemented in FileSystemControlAPI.") + + def list_runs(self, limit: int = 20) -> List[RunInfo]: + now = time.monotonic() + if self._runs_cache and now - self._runs_cache_ts < self.cache_ttl: + return self._runs_cache[:limit] + + if not self.runs_dir.exists(): + return [] + entries = [ + (p, p.stat().st_mtime) + for p in self.runs_dir.iterdir() + if p.is_dir() + ] + entries.sort(key=lambda x: x[1], reverse=True) + runs: list[RunInfo] = [] + for path, _ in entries[:limit]: + runs.append( + RunInfo( + run_id=path.name, + target="unknown", + status="unknown", + ) + ) + self._runs_cache = runs + self._runs_cache_ts = now + return runs + + def get_run_info(self, run_id: str) -> RunInfo | None: + path = self._run_path(run_id) + if not path.exists(): + return None + return RunInfo(run_id=run_id, target="unknown", status="unknown") + + def tail_logs(self, run_id: str, offset: int = 0, limit: int = 200) -> List[str]: + path = self._safe_path(run_id) + log_candidates = [ + path / "stdout.log", + path / "logs.txt", + path / "log.txt", + path / "run.log", + ] + log_file = next((p for p in log_candidates if p.exists()), None) + if not log_file: + return [] + with open(log_file, "r", encoding="utf-8", errors="ignore") as f: + lines = f.readlines() + return [line.rstrip("\n") for line in lines[offset: offset + limit]] + + def get_report_summary(self, run_id: str) -> str: + path = self._safe_path(run_id) + candidates = [ + path / "report.txt", + path / "report.md", + path / "report.html", + ] + report_file = next((p for p in candidates if p.exists()), None) + if not report_file: + return "" + with open(report_file, "r", encoding="utf-8", errors="ignore") as f: + content = f.read() + return content[:4000] + + def get_report_file(self, run_id: str) -> str | None: + path = self._safe_path(run_id) + candidates = [ + path / "report.txt", + path / "report.md", + path / "report.html", + path / "report.json", + path / "report.pdf", + ] + report_file = next((p for p in candidates if p.exists()), None) + return str(report_file) if report_file else None + + def get_file_metadata(self, run_id: str, path: str) -> tuple[str, int] | None: + file_path = self._safe_path(run_id, path) + if not file_path.exists() or not file_path.is_file(): + return None + return str(file_path), file_path.stat().st_size + + def list_files(self, run_id: str, path: str = "") -> List[Dict[str, Any]]: + base = self._safe_path(run_id, path) + if not base.exists() or not base.is_dir(): + return [] + results: list[Dict[str, Any]] = [] + for entry in base.iterdir(): + results.append( + { + "name": entry.name, + "path": os.path.relpath(entry, self._run_path(run_id)), + "is_dir": entry.is_dir(), + "size": entry.stat().st_size, + } + ) + return results + + def read_file(self, run_id: str, path: str) -> bytes: + file_path = self._safe_path(run_id, path) + if not file_path.exists() or not file_path.is_file(): + raise FileNotFoundError("File not found") + return file_path.read_bytes() + + def resume_run(self, run_id: str) -> bool: + raise NotImplementedError("Resume not implemented in FileSystemControlAPI.") + + def stop_run(self, run_id: str) -> bool: + raise NotImplementedError("Stop not implemented in FileSystemControlAPI.") diff --git a/strix/bot/main.py b/strix/bot/main.py new file mode 100644 index 00000000..8e8c74a4 --- /dev/null +++ b/strix/bot/main.py @@ -0,0 +1,35 @@ +import argparse +import logging +import os + +from .config import load_config +from .fs_api import FileSystemControlAPI +from .service import run +from .strix_control_api import StrixControlAPI + + +def build_control_api(mode: str, root: str) -> object: + if mode == "fs": + return FileSystemControlAPI(root_path=root) + return StrixControlAPI(root_path=root) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Strix Telegram bot service") + parser.add_argument( + "--mode", + choices=["strix", "fs"], + default=os.getenv("BOT_MODE", "strix"), + help="Control mode: strix (start/stop runs) or fs (read-only browsing).", + ) + args = parser.parse_args() + + logging.basicConfig(level=logging.INFO) + + cfg = load_config() + control_api = build_control_api(args.mode, cfg.root_path) + run(control_api, cfg) + + +if __name__ == "__main__": + main() diff --git a/strix/bot/service.py b/strix/bot/service.py new file mode 100644 index 00000000..ddb7d317 --- /dev/null +++ b/strix/bot/service.py @@ -0,0 +1,703 @@ +import asyncio +import json +import logging +import os +import time +from typing import Any, Callable, Optional + +from aiohttp import ClientSession, web +from aiogram import Bot, Dispatcher, F +from aiogram.enums import ParseMode +from aiogram.exceptions import TelegramBadRequest +from aiogram.filters import Command, CommandObject +from aiogram.types import CallbackQuery, InlineKeyboardButton, InlineKeyboardMarkup, Message + +from .config import TelegramBotConfig +from .control_api import ControlAPI +from .state import BotState + +logger = logging.getLogger(__name__) + +MAX_FILE_SIZE_BYTES = 45 * 1024 * 1024 # stay under Telegram limit with buffer +MAX_LIST_ITEMS = 12 +MAX_MESSAGE_CHARS = 3500 +SEVERITY_LEVEL = {"critical": 4, "high": 3, "medium": 2, "low": 1, "info": 0} +SEVERITY_ICON = {"critical": "πŸ”₯", "high": "πŸ”΄", "medium": "🟠", "low": "🟒", "info": "ℹ️"} +BATCH_INTERVAL_SECONDS = 5.0 +REDACT_PATTERNS = [("sk-", 3)] + + +def _is_allowed(user_id: int, config: TelegramBotConfig) -> bool: + return user_id in config.allowlisted_user_ids + + +def fetch_tail_page(control_api: ControlAPI, run_id: str, offset: int, page_size: int) -> tuple[list[str], bool, int]: + """Return a page of logs, whether more remain, and the next offset.""" + logs = control_api.tail_logs(run_id, offset=offset, limit=page_size + 1) + has_more = len(logs) > page_size + page = logs[:page_size] + next_offset = offset + len(page) + return page, has_more, next_offset + + +class RateLimiter: + def __init__(self, min_interval_seconds: float = 1.0) -> None: + self.min_interval = min_interval_seconds + self.last_seen: dict[int, float] = {} + + def allow(self, user_id: int) -> bool: + now = time.monotonic() + last = self.last_seen.get(user_id, 0.0) + if now - last < self.min_interval: + return False + self.last_seen[user_id] = now + return True + + +class GlobalRateLimiter: + def __init__(self, min_interval_seconds: float = 0.5) -> None: + self.min_interval = min_interval_seconds + self.last_seen = 0.0 + + def allow(self) -> bool: + now = time.monotonic() + if now - self.last_seen < self.min_interval: + return False + self.last_seen = now + return True + + +class Metrics: + def __init__(self) -> None: + self.counters: dict[str, int] = {} + self.errors: dict[str, int] = {} + self.latencies_ms: list[float] = [] + + def inc(self, name: str) -> None: + self.counters[name] = self.counters.get(name, 0) + 1 + + def error(self, name: str) -> None: + self.errors[name] = self.errors.get(name, 0) + 1 + + def add_latency(self, ms: float) -> None: + self.latencies_ms.append(ms) + + +class AlertSink: + def __init__(self, webhook: str | None = None) -> None: + self.webhook = webhook + + async def notify(self, kind: str, details: dict[str, Any]) -> None: + if not self.webhook: + return + payload = {"kind": kind, "details": details, "ts": time.time()} + try: + async with ClientSession() as session: + await session.post(self.webhook, json=payload, timeout=5) + except Exception: # noqa: BLE001 + logger.exception("Failed to send alert kind=%s", kind) + + +def create_dispatcher(control_api: ControlAPI, config: TelegramBotConfig) -> Dispatcher: + dp = Dispatcher() + rate_limiter = RateLimiter(min_interval_seconds=config.rate_limit_seconds) + global_limiter = GlobalRateLimiter(min_interval_seconds=config.global_rate_limit_seconds) + metrics = Metrics() + alert_sink = AlertSink(config.alert_webhook) + dp.metrics = metrics # type: ignore[attr-defined] + state = BotState(config.db_path) + + run_verbosity: dict[str, str] = {} + run_batches: dict[str, list[str]] = {} + batch_tasks: dict[str, asyncio.Task[Any]] = {} + doc_hint_ts: dict[int, float] = {} + + def redact(text: str) -> str: + masked = text + for prefix, visible in REDACT_PATTERNS: + idx = masked.find(prefix) + while idx != -1: + end = idx + len(prefix) + 16 + masked = masked[: idx + visible] + "***REDACTED***" + masked[end:] + idx = masked.find(prefix, end) + return masked + + dp["redact"] = redact + + def format_alert(sev: str, title: str, content: str) -> str: + icon = SEVERITY_ICON.get(sev, "ℹ️") + text = f"{icon} *{sev.upper()}* {title}\n```\n{content}\n```" + if len(text) > MAX_MESSAGE_CHARS: + text = text[:MAX_MESSAGE_CHARS] + "\n\n(truncated)" + return text + + def build_file_kb(run_id: str, files: list[dict[str, Any]], rel_path: str = "") -> list[list[InlineKeyboardButton]]: + kb_rows: list[list[InlineKeyboardButton]] = [] + if rel_path: + parent = os.path.normpath(os.path.join(rel_path, "..")) + kb_rows.append( + [ + InlineKeyboardButton( + text="⬆️ ..", + callback_data=f"file_nav:{run_id}:{'' if parent == '.' else parent}", + ) + ] + ) + for entry in files[:MAX_LIST_ITEMS]: + label = ("D " if entry.get("is_dir") else "F ") + entry.get("name", "") + entry_rel = entry.get("path", "") + action = "file_nav" if entry.get("is_dir") else "file_dl" + kb_rows.append( + [ + InlineKeyboardButton( + text=label, + callback_data=f"{action}:{run_id}:{entry_rel}", + ) + ] + ) + return kb_rows + + async def maybe_doc_hint(message: Message, topic: str = "troubleshooting") -> None: + now = time.monotonic() + chat_id = message.chat.id if message.chat else 0 + if now - doc_hint_ts.get(chat_id, 0.0) < 60: + return + doc_hint_ts[chat_id] = now + await message.answer(f"Need help? Try `/docs {topic}`", parse_mode=ParseMode.MARKDOWN) + + async def guard(message: Message, handler: Callable[[], Any]) -> None: + start = time.monotonic() + user_id = message.from_user.id if message.from_user else 0 + if not _is_allowed(user_id, config): + await message.answer("Access denied.") + return + if not global_limiter.allow(): + metrics.error("global_rate_limit") + await message.answer("System busy. Please retry shortly.") + return + if not rate_limiter.allow(user_id): + metrics.error("rate_limit") + await message.answer("Rate limited. Please slow down.") + return + metrics.inc("command") + red_text = redact(message.text or "") + logger.info("audit_command user_id=%s text=%s", user_id, red_text) + try: + await handler() + except Exception: # noqa: BLE001 + metrics.error("handler_error") + logger.exception("Handler error") + await message.answer("Unexpected error. Try again or see /docs troubleshooting.") + await maybe_doc_hint(message, topic="troubleshooting") + asyncio.create_task( + alert_sink.notify( + "handler_error", + {"user_id": user_id, "text": red_text}, + ) + ) + finally: + metrics.add_latency((time.monotonic() - start) * 1000) + + @dp.message(Command(commands=["start", "help"])) + async def cmd_help(message: Message) -> None: + async def run() -> None: + text = ( + "Strix Telegram bot.\n" + "/health\n" + "/newrun [instruction]\n" + "/runs [query]\n" + "/run info|tail|report|files|docs\n" + "/resume \n" + "/stop \n" + "/verbosity \n" + "/docs \n" + ) + await message.answer(text) + + await guard(message, run) + + @dp.message(Command(commands=["health"])) + async def cmd_health(message: Message) -> None: + async def run() -> None: + await message.answer("ok") + + await guard(message, run) + + @dp.message(Command(commands=["metrics"])) + async def cmd_metrics(message: Message) -> None: + async def run() -> None: + lines = ["Counters:"] + for k, v in metrics.counters.items(): + lines.append(f"{k}: {v}") + lines.append("Errors:") + for k, v in metrics.errors.items(): + lines.append(f"{k}: {v}") + await message.answer("\n".join(lines)) + + await guard(message, run) + + @dp.message(Command(commands=["newrun"])) + async def cmd_newrun(message: Message, command: CommandObject) -> None: + async def run() -> None: + args = command.args or "" + parts = args.split(" ", 1) + if not parts or not parts[0]: + await message.answer("Usage: /newrun [instruction]") + return + target = parts[0] + instruction = parts[1] if len(parts) > 1 else None + try: + chat_id = message.chat.id + run_id: Optional[str] = None + + async def flush_batch(rid: str) -> None: + await asyncio.sleep(BATCH_INTERVAL_SECONDS) + texts = run_batches.get(rid, []) + if not texts: + return + combined = "\n\n".join(texts) + if len(combined) > MAX_MESSAGE_CHARS: + combined = combined[:MAX_MESSAGE_CHARS] + "\n\n(truncated batch)" + await message.bot.send_message(chat_id=chat_id, text=combined, parse_mode=ParseMode.MARKDOWN) + run_batches[rid] = [] + batch_tasks.pop(rid, None) + + def stream_callback(report_id: str, title: str, content: str, severity: str) -> None: + sev = severity.lower() + level = SEVERITY_LEVEL.get(sev, 0) + mode = run_verbosity.get(run_id or "", state.get_verbosity(run_id or "") or "high-only") + if mode == "high-only" and level < 3: + return + red_title = redact(title) + red_content = redact(content) + text = format_alert(sev, red_title, red_content[:1200]) + if mode == "batched": + buf = run_batches.setdefault(run_id or "", []) + buf.append(text) + if run_id and run_id not in batch_tasks: + batch_tasks[run_id] = asyncio.create_task(flush_batch(run_id)) + else: + asyncio.create_task( + message.bot.send_message(chat_id=chat_id, text=text, parse_mode=ParseMode.MARKDOWN) + ) + + run_info = control_api.start_run(target=target, instruction=instruction, stream_callback=stream_callback) + run_id = run_info.run_id + logger.info("bot_run_started run_id=%s target=%s instruction=%s", run_id, target, (instruction or "").strip()) + default_mode = state.get_verbosity(run_id) or config.default_verbosity or "high-only" + run_verbosity[run_id] = default_mode + await message.answer(f"Started run {run_info.run_id} for target {run_info.target}") + except Exception as exc: # noqa: BLE001 + logger.exception("Failed to start run") + await message.answer(f"Failed to start run: {exc}") + await maybe_doc_hint(message) + + await guard(message, run) + + @dp.message(Command(commands=["runs"])) + async def cmd_runs(message: Message, command: CommandObject) -> None: + async def run() -> None: + query = (command.args or "").strip().lower() + runs = control_api.list_runs() + if query: + runs = [r for r in runs if query in r.run_id.lower() or query in r.target.lower()] + if not runs: + await message.answer("No runs found.") + return + kb_rows = [] + for r in runs[:MAX_LIST_ITEMS]: + kb_rows.append( + [ + InlineKeyboardButton( + text=f"{r.run_id} ({r.status})", + callback_data=f"run_info:{r.run_id}", + ) + ] + ) + await message.answer("Select a run:", reply_markup=InlineKeyboardMarkup(inline_keyboard=kb_rows)) + + await guard(message, run) + + @dp.message(Command(commands=["run"])) + async def cmd_run(message: Message, command: CommandObject) -> None: + async def run() -> None: + args = (command.args or "").split() + if len(args) < 2: + await message.answer("Usage: /run ") + await maybe_doc_hint(message) + return + run_id, subcmd = args[0], args[1] + if subcmd == "info": + info = control_api.get_run_info(run_id) + if not info: + await message.answer("Run not found.") + await maybe_doc_hint(message) + return + await message.answer(f"{info.run_id} - {info.target} - {info.status}") + elif subcmd == "tail": + logs, has_more, next_offset = fetch_tail_page(control_api, run_id, offset=0, page_size=50) + kb_rows: list[list[InlineKeyboardButton]] = [] + if has_more: + kb_rows.append( + [ + InlineKeyboardButton( + text="Tail more", + callback_data=f"tail_more:{run_id}:{next_offset}", + ) + ] + ) + await message.answer("\n".join(logs) if logs else "No logs.", reply_markup=InlineKeyboardMarkup(inline_keyboard=kb_rows) if kb_rows else None) + elif subcmd == "report": + summary = control_api.get_report_summary(run_id) + if summary and len(summary) > MAX_MESSAGE_CHARS: + summary = summary[:MAX_MESSAGE_CHARS] + "\n\n(truncated)" + kb = InlineKeyboardMarkup( + inline_keyboard=[ + [ + InlineKeyboardButton( + text="Send full report", + callback_data=f"report_full:{run_id}", + ) + ] + ] + ) + await message.answer(summary or "No report yet.", reply_markup=kb) + elif subcmd == "files": + files = control_api.list_files(run_id) + if not files: + await message.answer("No files.") + return + kb_rows = build_file_kb(run_id, files, rel_path="") + await message.answer("Select file or directory:", reply_markup=InlineKeyboardMarkup(inline_keyboard=kb_rows)) + elif subcmd == "docs": + await message.answer("Use /docs to fetch documentation excerpts.") + else: + await message.answer("Unknown subcommand.") + + await guard(message, run) + + @dp.message(Command(commands=["resume"])) + async def cmd_resume(message: Message, command: CommandObject) -> None: + async def run() -> None: + run_id = (command.args or "").strip() + if not run_id: + await message.answer("Usage: /resume ") + await maybe_doc_hint(message) + return + chat_id = message.chat.id + + def stream_callback(report_id: str, title: str, content: str, severity: str) -> None: + sev = severity.lower() + level = SEVERITY_LEVEL.get(sev, 0) + mode = run_verbosity.get(run_id, state.get_verbosity(run_id) or config.default_verbosity or "high-only") + if mode == "high-only" and level < 3: + return + text = format_alert(sev, redact(title), redact(content)[:1200]) + if mode == "batched": + buf = run_batches.setdefault(run_id, []) + buf.append(text) + if run_id not in batch_tasks: + batch_tasks[run_id] = asyncio.create_task(flush_batch_shared(run_id, chat_id)) + else: + asyncio.create_task(message.bot.send_message(chat_id=chat_id, text=text, parse_mode=ParseMode.MARKDOWN)) + + async def flush_batch_shared(rid: str, cid: int) -> None: + await asyncio.sleep(BATCH_INTERVAL_SECONDS) + texts = run_batches.get(rid, []) + if not texts: + return + combined = "\n\n".join(texts) + if len(combined) > MAX_MESSAGE_CHARS: + combined = combined[:MAX_MESSAGE_CHARS] + "\n\n(truncated batch)" + await message.bot.send_message(chat_id=cid, text=combined, parse_mode=ParseMode.MARKDOWN) + run_batches[rid] = [] + batch_tasks.pop(rid, None) + + try: + ok = control_api.resume_run(run_id, stream_callback=stream_callback) + if ok: + mode = state.get_verbosity(run_id) or config.default_verbosity or "high-only" + run_verbosity[run_id] = mode + await message.answer(f"Resumed streaming for {run_id} with verbosity {mode}.") + else: + await message.answer(f"Run {run_id} not active; cannot resume. Consider starting a new run.") + await maybe_doc_hint(message) + except NotImplementedError: + await message.answer("Resume is not supported yet.") + await maybe_doc_hint(message) + + await guard(message, run) + + @dp.message(Command(commands=["stop"])) + async def cmd_stop(message: Message, command: CommandObject) -> None: + async def run() -> None: + run_id = (command.args or "").strip() + if not run_id: + await message.answer("Usage: /stop ") + await maybe_doc_hint(message) + return + try: + if control_api.stop_run(run_id): + await message.answer(f"Stopped {run_id}") + else: + await message.answer(f"Could not stop {run_id}") + await maybe_doc_hint(message) + except NotImplementedError: + await message.answer("Stop is not supported yet.") + await maybe_doc_hint(message) + + await guard(message, run) + + @dp.message(Command(commands=["verbosity"])) + async def cmd_verbosity(message: Message, command: CommandObject) -> None: + async def run() -> None: + args = (command.args or "").split() + if len(args) != 2: + await message.answer("Usage: /verbosity ") + return + run_id, mode = args + if mode not in {"high-only", "batched", "full"}: + await message.answer("Mode must be one of: high-only, batched, full.") + return + run_verbosity[run_id] = mode + state.set_verbosity(run_id, mode) + kb = InlineKeyboardMarkup( + inline_keyboard=[ + [ + InlineKeyboardButton(text="High-only", callback_data=f"verbosity:{run_id}:high-only"), + InlineKeyboardButton(text="Batched", callback_data=f"verbosity:{run_id}:batched"), + InlineKeyboardButton(text="Full", callback_data=f"verbosity:{run_id}:full"), + ] + ] + ) + await message.answer(f"Verbosity for {run_id} set to {mode}.", reply_markup=kb) + + await guard(message, run) + + @dp.message(F.text) + async def fallback(message: Message) -> None: + await message.answer("Unrecognized command. Send /help.") + + @dp.callback_query(F.data.startswith("report_full:")) + async def report_full(cb: CallbackQuery) -> None: + data = cb.data or "" + parts = data.split(":", 1) + if len(parts) != 2: + await cb.answer() + return + run_id = parts[1] + try: + file_path = control_api.get_report_file(run_id) + if not file_path: + await cb.message.answer("Report file not found.") + await cb.answer() + return + size = os.path.getsize(file_path) + if size > MAX_FILE_SIZE_BYTES: + await cb.message.answer("Report too large to send. Please retrieve manually.") + await cb.answer() + return + with open(file_path, "rb") as fh: + logger.info("bot_report_send run_id=%s path=%s size=%s", run_id, file_path, size) + await cb.message.answer_document(document=fh) + except TelegramBadRequest as exc: + await cb.message.answer(f"Failed to send report: {exc}") + asyncio.create_task( + alert_sink.notify( + "delivery_error", + {"run_id": run_id, "path": file_path, "error": str(exc)}, + ) + ) + except Exception as exc: # noqa: BLE001 + logger.exception("Failed to send report") + await cb.message.answer(f"Failed to send report: {exc}") + asyncio.create_task(alert_sink.notify("delivery_error", {"run_id": run_id, "path": file_path, "error": str(exc)})) + await cb.answer() + + @dp.callback_query(F.data.startswith("run_info:")) + async def run_info_cb(cb: CallbackQuery) -> None: + data = cb.data or "" + parts = data.split(":", 1) + if len(parts) != 2: + await cb.answer() + return + run_id = parts[1] + info = control_api.get_run_info(run_id) + if not info: + await cb.message.answer("Run not found.") + await cb.answer() + return + text = f"{info.run_id}\nTarget: {info.target}\nStatus: {info.status}" + await cb.message.answer(text) + await cb.answer() + + @dp.callback_query(F.data.startswith("tail_more:")) + async def tail_more(cb: CallbackQuery) -> None: + data = cb.data or "" + parts = data.split(":", 2) + if len(parts) != 3: + await cb.answer() + return + run_id, offset_str = parts[1], parts[2] + try: + offset = int(offset_str) + except ValueError: + await cb.answer() + return + logs, has_more, next_offset = fetch_tail_page(control_api, run_id, offset=offset, page_size=50) + kb_rows: list[list[InlineKeyboardButton]] = [] + if has_more: + kb_rows.append( + [ + InlineKeyboardButton( + text="Tail more", + callback_data=f"tail_more:{run_id}:{next_offset}", + ) + ] + ) + await cb.message.answer( + "\n".join(logs) if logs else "No logs.", + reply_markup=InlineKeyboardMarkup(inline_keyboard=kb_rows) if kb_rows else None, + ) + await cb.answer() + + @dp.callback_query(F.data.startswith("file_nav:")) + async def file_nav(cb: CallbackQuery) -> None: + data = cb.data or "" + parts = data.split(":", 2) + if len(parts) != 3: + await cb.answer() + return + run_id, rel_path = parts[1], parts[2] + try: + files = control_api.list_files(run_id, rel_path) + if not files: + await cb.message.answer("No files.") + await cb.answer() + return + kb_rows = build_file_kb(run_id, files, rel_path=rel_path) + await cb.message.answer(f"Browsing `{rel_path or '.'}`", reply_markup=InlineKeyboardMarkup(inline_keyboard=kb_rows)) + except Exception as exc: # noqa: BLE001 + logger.exception("Failed to browse files") + await cb.message.answer(f"Failed to browse files: {exc}") + await cb.answer() + + @dp.callback_query(F.data.startswith("file_dl:")) + async def file_dl(cb: CallbackQuery) -> None: + data = cb.data or "" + parts = data.split(":", 2) + if len(parts) != 3: + await cb.answer() + return + run_id, rel_path = parts[1], parts[2] + try: + meta = control_api.get_file_metadata(run_id, rel_path) + if not meta: + await cb.message.answer("File not found.") + await cb.answer() + return + file_path, size = meta + if size > MAX_FILE_SIZE_BYTES: + await cb.message.answer("File too large to send. Please fetch manually.") + await cb.answer() + return + with open(file_path, "rb") as fh: + logger.info("bot_file_send run_id=%s path=%s size=%s", run_id, rel_path, size) + await cb.message.answer_document(document=fh) + except TelegramBadRequest as exc: + await cb.message.answer(f"Failed to send file: {exc}") + asyncio.create_task( + alert_sink.notify( + "delivery_error", + {"run_id": run_id, "path": rel_path, "error": str(exc)}, + ) + ) + except Exception as exc: # noqa: BLE001 + logger.exception("Failed to send file") + await cb.message.answer(f"Failed to send file: {exc}") + asyncio.create_task(alert_sink.notify("delivery_error", {"run_id": run_id, "path": rel_path, "error": str(exc)})) + await cb.answer() + + @dp.callback_query(F.data.startswith("verbosity:")) + async def verbosity_cb(cb: CallbackQuery) -> None: + data = cb.data or "" + parts = data.split(":", 2) + if len(parts) != 3: + await cb.answer() + return + run_id, mode = parts[1], parts[2] + if mode not in {"high-only", "batched", "full"}: + await cb.answer() + return + run_verbosity[run_id] = mode + state.set_verbosity(run_id, mode) + await cb.message.answer(f"Verbosity for {run_id} set to {mode}.") + await cb.answer() + + return dp + + +def build_http_app(metrics: Metrics, token: str | None = None) -> web.Application: + app = web.Application() + + async def _auth(request: web.Request) -> bool: + if not token: + return True + provided = request.headers.get("Authorization", "") + return provided == f"Bearer {token}" + + async def health_handler(_: web.Request) -> web.Response: + return web.Response(text="ok") + + async def metrics_handler(request: web.Request) -> web.Response: + if not await _auth(request): + return web.Response(status=403) + fmt = request.query.get("format", "json") + counters = metrics.counters + errors = metrics.errors + latencies = metrics.latencies_ms + if fmt == "prom": + lines = [] + for k, v in counters.items(): + lines.append(f"strix_bot_counter{{name=\"{k}\"}} {v}") + for k, v in errors.items(): + lines.append(f"strix_bot_error_total{{name=\"{k}\"}} {v}") + if latencies: + avg = sum(latencies) / len(latencies) + lines.append(f"strix_bot_command_latency_ms_avg {avg:.2f}") + return web.Response(text="\n".join(lines), content_type="text/plain") + avg_latency = sum(latencies) / len(latencies) if latencies else 0.0 + return web.json_response({"counters": counters, "errors": errors, "avg_latency_ms": avg_latency}) + + app.add_routes( + [ + web.get("/healthz", health_handler), + web.get("/health", health_handler), + web.get("/metrics", metrics_handler), + ] + ) + return app + + +async def run_bot(control_api: ControlAPI, config: TelegramBotConfig) -> None: + bot = Bot(token=config.bot_token) + dp = create_dispatcher(control_api, config) + runner: web.AppRunner | None = None + if config.http_port: + app = build_http_app(dp.metrics, token=config.http_token) # type: ignore[arg-type, attr-defined] + runner = web.AppRunner(app) + await runner.setup() + site = web.TCPSite(runner, host=config.http_host or "0.0.0.0", port=config.http_port) + await site.start() + logger.info("HTTP server started on %s:%s", config.http_host or "0.0.0.0", config.http_port) + + if not config.webhook_url: + raise RuntimeError("Webhook URL not configured.") + await bot.set_webhook(config.webhook_url) + await dp.start_polling(bot) + if runner: + await runner.cleanup() + + +def run(control_api: ControlAPI, config: TelegramBotConfig) -> None: + asyncio.run(run_bot(control_api, config)) diff --git a/strix/bot/state.py b/strix/bot/state.py new file mode 100644 index 00000000..48685856 --- /dev/null +++ b/strix/bot/state.py @@ -0,0 +1,46 @@ +import sqlite3 +from pathlib import Path +from typing import Optional + + +class BotState: + """ + Lightweight SQLite-backed state for run preferences (e.g., verbosity). + """ + + def __init__(self, db_path: str) -> None: + self.db_path = db_path + Path(db_path).parent.mkdir(parents=True, exist_ok=True) + self._init_db() + + def _init_db(self) -> None: + with sqlite3.connect(self.db_path) as conn: + conn.execute( + """ + CREATE TABLE IF NOT EXISTS run_settings ( + run_id TEXT PRIMARY KEY, + verbosity TEXT + ) + """ + ) + conn.commit() + + def set_verbosity(self, run_id: str, verbosity: str) -> None: + with sqlite3.connect(self.db_path) as conn: + conn.execute( + """ + INSERT INTO run_settings (run_id, verbosity) + VALUES (?, ?) + ON CONFLICT(run_id) DO UPDATE SET verbosity=excluded.verbosity + """, + (run_id, verbosity), + ) + conn.commit() + + def get_verbosity(self, run_id: str) -> Optional[str]: + with sqlite3.connect(self.db_path) as conn: + cur = conn.execute( + "SELECT verbosity FROM run_settings WHERE run_id = ?", (run_id,) + ) + row = cur.fetchone() + return row[0] if row else None diff --git a/strix/bot/strix_control_api.py b/strix/bot/strix_control_api.py new file mode 100644 index 00000000..d4149617 --- /dev/null +++ b/strix/bot/strix_control_api.py @@ -0,0 +1,264 @@ +from __future__ import annotations + +import asyncio +import logging +import os +from pathlib import Path +from typing import Any, Dict, List, Optional +from datetime import datetime + +from strix.interface.main import build_targets_info # type: ignore +from strix.interface.utils import generate_run_name # type: ignore +from strix.llm.config import LLMConfig +from strix.telemetry.tracer import Tracer, set_global_tracer + +from strix.agents.StrixAgent import StrixAgent +from strix.agents.iteration_policy import calculate_iteration_budget +from .control_api import ControlAPI, RunInfo + +logger = logging.getLogger(__name__) + + +class StrixControlAPI(ControlAPI): + """ + Control API that starts Strix runs via internal interfaces. + Note: stop/resume/status are minimal; enhance with runtime hooks. + """ + + def __init__(self, root_path: str | Path = ".") -> None: + self.root_path = Path(root_path).resolve() + self.runs_dir = self.root_path / "strix_runs" + self.active: Dict[str, dict[str, Any]] = {} + + def start_run( + self, + target: str, + instruction: str | None = None, + verbosity: str | None = None, + stream_callback: Optional[Callable[[str, str, str, str], None]] = None, + ) -> RunInfo: + run_name = generate_run_name() + targets_info = build_targets_info([target]) + scan_config = { + "scan_id": run_name, + "targets": targets_info, + "user_instructions": instruction or "", + "run_name": run_name, + } + tracer = Tracer(run_name) + tracer.set_scan_config(scan_config) + set_global_tracer(tracer) + + if stream_callback: + def vuln_handler(report_id: str, title: str, content: str, severity: str) -> None: + try: + stream_callback(report_id, title, content, severity) + except Exception: # noqa: BLE001 + logger.exception("Stream callback failed") + + tracer.vulnerability_found_callback = vuln_handler + + llm_config = LLMConfig() + iteration_policy = calculate_iteration_budget(targets_info, llm_config.timeout) + agent_config = { + "llm_config": llm_config, + "max_iterations": iteration_policy["max_iterations"], + "iteration_policy": iteration_policy, + "non_interactive": True, + } + tracer.set_iteration_policy(iteration_policy) + agent = StrixAgent(agent_config) + + async def runner() -> None: + try: + await agent.run() + if run_name in self.active: + self.active[run_name]["status"] = "completed" + self.active[run_name]["ended_at"] = datetime.utcnow().isoformat() + except Exception: # noqa: BLE001 + logger.exception("Run failed for %s", run_name) + if run_name in self.active: + self.active[run_name]["status"] = "failed" + self.active[run_name]["ended_at"] = datetime.utcnow().isoformat() + + task = asyncio.create_task(runner()) + self.active[run_name] = { + "agent": agent, + "tracer": tracer, + "targets": targets_info, + "status": "running", + "task": task, + "started_at": datetime.utcnow().isoformat(), + } + return RunInfo(run_id=run_name, target=target, status="running", instruction=instruction) + + def list_runs(self, limit: int = 20) -> List[RunInfo]: + self._reap_finished() + runs: list[RunInfo] = [] + for run_id, info in list(self.active.items())[:limit]: + runs.append(self._build_run_info(run_id, info)) + + # Fill with filesystem runs not in active list + if len(runs) < limit and self.runs_dir.exists(): + existing_ids = {r.run_id for r in runs} + for path in sorted(self.runs_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True): + if not path.is_dir(): + continue + if path.name in existing_ids: + continue + runs.append( + RunInfo( + run_id=path.name, + target="unknown", + status="unknown", + ) + ) + if len(runs) >= limit: + break + return runs + + def get_run_info(self, run_id: str) -> RunInfo | None: + self._reap_finished() + info = self.active.get(run_id) + if not info: + # fallback to filesystem presence + path = self.runs_dir / run_id + if path.exists(): + return RunInfo(run_id=run_id, target="unknown", status="unknown") + return None + return self._build_run_info(run_id, info) + + def tail_logs(self, run_id: str, offset: int = 0, limit: int = 200) -> List[str]: + path = self.runs_dir / run_id + candidates = [ + path / "stdout.log", + path / "logs.txt", + path / "log.txt", + path / "run.log", + ] + log_file = next((p for p in candidates if p.exists()), None) + if not log_file: + return [] + with open(log_file, "r", encoding="utf-8", errors="ignore") as f: + lines = f.readlines() + return [line.rstrip("\n") for line in lines[offset: offset + limit]] + + def get_report_summary(self, run_id: str) -> str: + path = self.runs_dir / run_id + report_file = self._find_report_file(path) + if not report_file: + return "" + with open(report_file, "r", encoding="utf-8", errors="ignore") as f: + content = f.read() + return content[:4000] + + def get_report_file(self, run_id: str) -> str | None: + path = self.runs_dir / run_id + report_file = self._find_report_file(path) + return str(report_file) if report_file else None + + def get_file_metadata(self, run_id: str, path: str) -> tuple[str, int] | None: + base = (self.runs_dir / run_id).resolve() + target = (base / path).resolve() + if not str(target).startswith(str(base)) or not target.is_file(): + return None + return str(target), target.stat().st_size + + def list_files(self, run_id: str, path: str = "") -> List[Dict[str, Any]]: + base = (self.runs_dir / run_id / path).resolve() + root = (self.runs_dir / run_id).resolve() + if not str(base).startswith(str(root)) or not base.exists() or not base.is_dir(): + return [] + results: list[Dict[str, Any]] = [] + for entry in base.iterdir(): + results.append( + { + "name": entry.name, + "path": os.path.relpath(entry, root), + "is_dir": entry.is_dir(), + "size": entry.stat().st_size, + } + ) + return results + + def read_file(self, run_id: str, path: str) -> bytes: + base = (self.runs_dir / run_id).resolve() + target = (base / path).resolve() + if not str(target).startswith(str(base)) or not target.exists() or not target.is_file(): + raise FileNotFoundError("File not found") + return target.read_bytes() + + def resume_run(self, run_id: str, stream_callback: Optional[Callable[[str, str, str, str], None]] = None) -> bool: + self._reap_finished() + info = self.active.get(run_id) + if not info: + return False + tracer = info.get("tracer") + if tracer and stream_callback: + tracer.vulnerability_found_callback = stream_callback + # If a task exists and is still running, consider it resumed + task = info.get("task") + if task and not task.done(): + info["status"] = "running" + return True + return False + + def stop_run(self, run_id: str) -> bool: + self._reap_finished() + info = self.active.get(run_id) + if not info: + return False + agent = info.get("agent") + if hasattr(agent, "cancel"): + agent.cancel() + info["status"] = "stopped" + info["ended_at"] = datetime.utcnow().isoformat() + return True + task = info.get("task") + if task: + task.cancel() + info["status"] = "stopped" + info["ended_at"] = datetime.utcnow().isoformat() + return True + # If no cancel available, mark as stopped + info["status"] = "stopped" + info["ended_at"] = datetime.utcnow().isoformat() + return True + + def _build_run_info(self, run_id: str, info: dict[str, Any]) -> RunInfo: + target = info.get("targets", [{}])[0].get("original", "unknown") + status = info.get("status", "running") + ri = RunInfo( + run_id=run_id, + target=target, + status=status, + ) + return ri + + def _find_report_file(self, base: Path) -> Path | None: + candidates = [ + base / "report.txt", + base / "report.md", + base / "report.html", + base / "report.json", + base / "report.pdf", + ] + for path in candidates: + if path.exists(): + return path + return None + + def _reap_finished(self) -> None: + for run_id, info in self.active.items(): + task = info.get("task") + if task and task.done(): + if task.cancelled(): + info["status"] = "stopped" + info["ended_at"] = datetime.utcnow().isoformat() + elif task.exception(): + info["status"] = "failed" + info["ended_at"] = datetime.utcnow().isoformat() + else: + info["status"] = "completed" + info["ended_at"] = datetime.utcnow().isoformat() + # Optionally prune very old entries if needed (not implemented) diff --git a/strix/interface/cli.py b/strix/interface/cli.py index 626cbded..da51087a 100644 --- a/strix/interface/cli.py +++ b/strix/interface/cli.py @@ -11,6 +11,7 @@ from rich.text import Text from strix.agents.StrixAgent import StrixAgent +from strix.agents.iteration_policy import calculate_iteration_budget from strix.llm.config import LLMConfig from strix.telemetry.tracer import Tracer, set_global_tracer @@ -74,9 +75,11 @@ async def run_cli(args: Any) -> None: # noqa: PLR0915 } llm_config = LLMConfig() + iteration_policy = calculate_iteration_budget(args.targets_info, llm_config.timeout) agent_config = { "llm_config": llm_config, - "max_iterations": 300, + "max_iterations": iteration_policy["max_iterations"], + "iteration_policy": iteration_policy, "non_interactive": True, } @@ -85,6 +88,7 @@ async def run_cli(args: Any) -> None: # noqa: PLR0915 tracer = Tracer(args.run_name) tracer.set_scan_config(scan_config) + tracer.set_iteration_policy(iteration_policy) def display_vulnerability(report_id: str, title: str, content: str, severity: str) -> None: severity_color = get_severity_color(severity.lower()) diff --git a/strix/interface/run_manager.py b/strix/interface/run_manager.py new file mode 100644 index 00000000..2fb4ca2b --- /dev/null +++ b/strix/interface/run_manager.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import asyncio +from typing import Any, Callable, Coroutine + + +class RunManager: + def __init__(self, max_concurrent: int = 2) -> None: + self.semaphore = asyncio.Semaphore(max_concurrent) + + async def run_with_budget( + self, + tasks: list[tuple[str, Callable[[], Coroutine[Any, Any, Any]]]], + ) -> dict[str, Any]: + results: dict[str, Any] = {} + + async def _wrap(name: str, coro_fn: Callable[[], Coroutine[Any, Any, Any]]) -> None: + async with self.semaphore: + try: + results[name] = await coro_fn() + except Exception as exc: # noqa: BLE001 + results[name] = {"success": False, "error": str(exc)} + + await asyncio.gather(*[_wrap(name, fn) for name, fn in tasks]) + return results diff --git a/strix/interface/tui.py b/strix/interface/tui.py index 1b0bc37f..7dda6075 100644 --- a/strix/interface/tui.py +++ b/strix/interface/tui.py @@ -31,6 +31,7 @@ from textual.widgets.tree import TreeNode from strix.agents.StrixAgent import StrixAgent +from strix.agents.iteration_policy import calculate_iteration_budget from strix.interface.utils import build_live_stats_text from strix.llm.config import LLMConfig from strix.telemetry.tracer import Tracer, set_global_tracer @@ -282,6 +283,8 @@ def __init__(self, args: argparse.Namespace): self.tracer = Tracer(self.scan_config["run_name"]) self.tracer.set_scan_config(self.scan_config) + if self.agent_config.get("iteration_policy"): + self.tracer.set_iteration_policy(self.agent_config["iteration_policy"]) set_global_tracer(self.tracer) self.agent_nodes: dict[str, TreeNode] = {} @@ -321,9 +324,11 @@ def _build_scan_config(self, args: argparse.Namespace) -> dict[str, Any]: def _build_agent_config(self, args: argparse.Namespace) -> dict[str, Any]: llm_config = LLMConfig() + iteration_policy = calculate_iteration_budget(args.targets_info, llm_config.timeout) config = { "llm_config": llm_config, - "max_iterations": 300, + "max_iterations": iteration_policy["max_iterations"], + "iteration_policy": iteration_policy, } if getattr(args, "local_sources", None): diff --git a/strix/llm/llm.py b/strix/llm/llm.py index 99a566a4..8a23620e 100644 --- a/strix/llm/llm.py +++ b/strix/llm/llm.py @@ -1,5 +1,6 @@ import logging import os +from pathlib import Path from dataclasses import dataclass from enum import Enum from fnmatch import fnmatch @@ -25,15 +26,37 @@ logger = logging.getLogger(__name__) -api_key = os.getenv("LLM_API_KEY") + +def _load_env_file(path: Path) -> dict[str, str]: + env: dict[str, str] = {} + if not path.exists(): + return env + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, val = line.split("=", 1) + env[key.strip()] = val.strip() + return env + + +file_env = _load_env_file(Path(".env")) + +def _getenv(name: str) -> str | None: + if name in os.environ: + return os.getenv(name) + return file_env.get(name) + + +api_key = _getenv("LLM_API_KEY") if api_key: litellm.api_key = api_key api_base = ( - os.getenv("LLM_API_BASE") - or os.getenv("OPENAI_API_BASE") - or os.getenv("LITELLM_BASE_URL") - or os.getenv("OLLAMA_API_BASE") + _getenv("LLM_API_BASE") + or _getenv("OPENAI_API_BASE") + or _getenv("LITELLM_BASE_URL") + or _getenv("OLLAMA_API_BASE") ) if api_base: litellm.api_base = api_base diff --git a/strix/llm/router.py b/strix/llm/router.py new file mode 100644 index 00000000..d39ed934 --- /dev/null +++ b/strix/llm/router.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from typing import Any, Callable, Protocol + + +class ChatBackend(Protocol): + def generate(self, *args: Any, **kwargs: Any) -> Any: ... + + +class MultiplexingLLM: + def __init__( + self, + primary: ChatBackend, + fallback: ChatBackend | None = None, + should_retry: Callable[[Exception], bool] | None = None, + ) -> None: + self.primary = primary + self.fallback = fallback + self.should_retry = should_retry or (lambda exc: True) + + async def generate(self, *args: Any, **kwargs: Any) -> Any: + try: + return await self.primary.generate(*args, **kwargs) + except Exception as exc: # noqa: BLE001 + if self.fallback and self.should_retry(exc): + return await self.fallback.generate(*args, **kwargs) + raise diff --git a/strix/prompts/auth/oidc_saml_sso.jinja b/strix/prompts/auth/oidc_saml_sso.jinja new file mode 100644 index 00000000..3236093d --- /dev/null +++ b/strix/prompts/auth/oidc_saml_sso.jinja @@ -0,0 +1,24 @@ +Title: OIDC/SAML/SSO Authentication Playbook + +Goals: +- Validate OIDC/SAML login flows, session handling, and token misuse. +- Identify insecure redirect URIs, weak token validation, and replay risks. + +Checklist: +- Map identity flow: discovery endpoints (.well-known/openid-configuration), authz URL, token URL, JWKS, ACS/SSO endpoints. +- Try login with controlled redirect_uri and response_mode/form_post vs query to detect open redirect/exfil. +- Inspect tokens (id/access) for alg=none, kid confusion, missing aud/iss/exp/nbf/iat checks; attempt forged token with wrong kid. +- Test SAML: unsigned/assertion wrapping, replay (multiple submissions), relaxed Recipient/Audience/NotOnOrAfter. +- Evaluate session fixation and cookie flags (Secure, HttpOnly, SameSite) post-login. +- Fuzz return_to/RelayState/state/redirect_uri for open redirect or code leakage. +- Verify logout/invalidation: token revocation endpoints, backchannel/frontchannel logout. +- Check MFA/step-up enforcement and device binding if claimed. + +Tools: +- Prefer browser/proxy for auth flows; capture network requests and response timing. +- Use terminal/python tools only for inspecting tokens and JWKS responses; avoid hitting prod identity endpoints with aggressive fuzzing. + +Cautions: +- Do not brute-force passwords. +- Do not send tokens to external endpoints. +- Keep all tokens in workspace; redact before sharing. diff --git a/strix/runtime/benchmark.py b/strix/runtime/benchmark.py new file mode 100644 index 00000000..a0b96ea5 --- /dev/null +++ b/strix/runtime/benchmark.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +import time +from typing import Any, Callable + + +def run_benchmark(name: str, fn: Callable[[], Any]) -> dict[str, Any]: + start = time.perf_counter() + result = fn() + duration_ms = (time.perf_counter() - start) * 1000 + return {"name": name, "duration_ms": round(duration_ms, 2), "result": result} diff --git a/strix/runtime/tool_pool.py b/strix/runtime/tool_pool.py new file mode 100644 index 00000000..99b095c8 --- /dev/null +++ b/strix/runtime/tool_pool.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import asyncio +from typing import Any, Callable + + +class ToolServerPool: + def __init__(self, spawn: Callable[[], Any], max_instances: int = 2) -> None: + self.spawn = spawn + self.max_instances = max_instances + self.instances: list[Any] = [] + self.health: dict[int, str] = {} + self.lock = asyncio.Lock() + + async def get_instance(self) -> Any: + async with self.lock: + for inst in self.instances: + if self.health.get(id(inst)) == "healthy": + return inst + + if len(self.instances) < self.max_instances: + inst = self.spawn() + self.instances.append(inst) + self.health[id(inst)] = "healthy" + return inst + + return self.instances[0] + + async def mark_unhealthy(self, instance: Any) -> None: + async with self.lock: + self.health[id(instance)] = "unhealthy" + + async def get_health(self) -> dict[int, str]: + async with self.lock: + return dict(self.health) diff --git a/strix/telemetry/tracer.py b/strix/telemetry/tracer.py index 6da30d53..3529a2b1 100644 --- a/strix/telemetry/tracer.py +++ b/strix/telemetry/tracer.py @@ -1,3 +1,4 @@ +import json import logging from datetime import UTC, datetime from pathlib import Path @@ -46,6 +47,7 @@ def __init__(self, run_name: str | None = None): "end_time": None, "targets": [], "status": "running", + "max_iterations": None, } self._run_dir: Path | None = None self._next_execution_id = 1 @@ -74,6 +76,10 @@ def add_vulnerability_report( title: str, content: str, severity: str, + cvss_score: float | None = None, + references: list[str] | None = None, + fix_recommendation: str | None = None, + cwe: list[str] | None = None, ) -> str: report_id = f"vuln-{len(self.vulnerability_reports) + 1:04d}" @@ -83,6 +89,10 @@ def add_vulnerability_report( "content": content.strip(), "severity": severity.lower().strip(), "timestamp": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC"), + "cvss_score": cvss_score, + "references": references or [], + "fix_recommendation": fix_recommendation, + "cwe": cwe or [], } self.vulnerability_reports.append(report) @@ -197,11 +207,16 @@ def set_scan_config(self, config: dict[str, Any]) -> None: { "targets": config.get("targets", []), "user_instructions": config.get("user_instructions", ""), - "max_iterations": config.get("max_iterations", 200), + "max_iterations": config.get("max_iterations", 300), } ) self.get_run_dir() + def set_iteration_policy(self, policy: dict[str, Any]) -> None: + self.run_metadata["iteration_policy"] = policy + if "max_iterations" in policy: + self.run_metadata["max_iterations"] = policy["max_iterations"] + def save_run_data(self, mark_complete: bool = False) -> None: try: run_dir = self.get_run_dir() @@ -236,9 +251,20 @@ def save_run_data(self, mark_complete: bool = False) -> None: f.write(f"# {report['title']}\n\n") f.write(f"**ID:** {report['id']}\n") f.write(f"**Severity:** {report['severity'].upper()}\n") + if report.get("cvss_score") is not None: + f.write(f"**CVSS:** {report['cvss_score']}\n") + if report.get("cwe"): + f.write(f"**CWE:** {', '.join(report['cwe'])}\n") f.write(f"**Found:** {report['timestamp']}\n\n") f.write("## Description\n\n") f.write(f"{report['content']}\n") + if report.get("fix_recommendation"): + f.write("\n## Fix Recommendation\n\n") + f.write(f"{report['fix_recommendation']}\n") + if report.get("references"): + f.write("\n## References\n\n") + for ref in report["references"]: + f.write(f"- {ref}\n") self._saved_vuln_ids.add(report["id"]) if self.vulnerability_reports: @@ -252,7 +278,16 @@ def save_run_data(self, mark_complete: bool = False) -> None: with vuln_csv_file.open("w", encoding="utf-8", newline="") as f: import csv - fieldnames = ["id", "title", "severity", "timestamp", "file"] + fieldnames = [ + "id", + "title", + "severity", + "timestamp", + "cvss", + "cwe", + "references", + "file", + ] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() @@ -263,10 +298,38 @@ def save_run_data(self, mark_complete: bool = False) -> None: "title": report["title"], "severity": report["severity"].upper(), "timestamp": report["timestamp"], + "cvss": report.get("cvss_score"), + "cwe": ",".join(report.get("cwe", [])), + "references": ",".join(report.get("references", [])), "file": f"vulnerabilities/{report['id']}.md", } ) + vuln_jsonl_file = run_dir / "vulnerabilities.jsonl" + with vuln_jsonl_file.open("w", encoding="utf-8") as f: + for report in sorted_reports: + json_record = { + "id": report["id"], + "title": report["title"], + "severity": report["severity"], + "timestamp": report["timestamp"], + "content": report["content"], + "cvss_score": report.get("cvss_score"), + "cwe": report.get("cwe", []), + "references": report.get("references", []), + "fix_recommendation": report.get("fix_recommendation"), + "file": f"vulnerabilities/{report['id']}.md", + "run_id": self.run_id, + "run_name": self.run_name, + } + f.write(json.dumps(json_record)) + f.write("\n") + + sarif_file = run_dir / "vulnerabilities.sarif.json" + sarif_payload = self._build_sarif_report(sorted_reports) + with sarif_file.open("w", encoding="utf-8") as f: + json.dump(sarif_payload, f, indent=2) + if new_reports: logger.info( f"Saved {len(new_reports)} new vulnerability report(s) to: {vuln_dir}" @@ -333,5 +396,75 @@ def get_total_llm_stats(self) -> dict[str, Any]: "total_tokens": total_stats["input_tokens"] + total_stats["output_tokens"], } + def _build_sarif_report(self, reports: list[dict[str, Any]]) -> dict[str, Any]: + severity_rules = { + "critical": {"rule_id": "STRIX.CRITICAL", "level": "error", "name": "Critical"}, + "high": {"rule_id": "STRIX.HIGH", "level": "error", "name": "High"}, + "medium": {"rule_id": "STRIX.MEDIUM", "level": "warning", "name": "Medium"}, + "low": {"rule_id": "STRIX.LOW", "level": "note", "name": "Low"}, + "info": {"rule_id": "STRIX.INFO", "level": "note", "name": "Informational"}, + } + + rules = [ + { + "id": rule["rule_id"], + "name": f"{rule['name']} Severity", + "shortDescription": {"text": f"{rule['name']} severity vulnerability"}, + "defaultConfiguration": {"level": rule["level"]}, + } + for rule in severity_rules.values() + ] + + results = [] + for report in reports: + severity_key = report.get("severity", "medium").lower().strip() + rule = severity_rules.get(severity_key, severity_rules["medium"]) + result = { + "ruleId": rule["rule_id"], + "level": rule["level"], + "message": {"text": report.get("title", "Strix vulnerability")}, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": f"vulnerabilities/{report.get('id', 'unknown')}.md" + } + } + } + ], + "properties": { + "id": report.get("id"), + "severity": severity_key, + "timestamp": report.get("timestamp"), + "content": report.get("content"), + "cvss_score": report.get("cvss_score"), + "cwe": report.get("cwe", []), + "references": report.get("references", []), + "fix_recommendation": report.get("fix_recommendation"), + "runId": self.run_id, + "runName": self.run_name or "", + }, + "partialFingerprints": {"strix/vulnerabilityId": report.get("id", "")}, + } + results.append(result) + + sarif_payload = { + "$schema": "https://json.schemastore.org/sarif-2.1.0.json", + "version": "2.1.0", + "runs": [ + { + "tool": { + "driver": { + "name": "Strix", + "rules": rules, + } + }, + "results": results, + } + ], + } + + return sarif_payload + def cleanup(self) -> None: self.save_run_data(mark_complete=True) diff --git a/strix/tools/__init__.py b/strix/tools/__init__.py index 8d5f896b..5843253f 100644 --- a/strix/tools/__init__.py +++ b/strix/tools/__init__.py @@ -33,6 +33,9 @@ from .proxy import * # noqa: F403 from .python import * # noqa: F403 from .reporting import * # noqa: F403 + from .cache import * # noqa: F403 + from .api_probe import * # noqa: F403 + from .sast import * # noqa: F403 from .terminal import * # noqa: F403 from .thinking import * # noqa: F403 @@ -44,6 +47,8 @@ from .notes import * # noqa: F403 from .proxy import * # noqa: F403 from .python import * # noqa: F403 + from .api_probe import * # noqa: F403 + from .sast import * # noqa: F403 from .terminal import * # noqa: F403 __all__ = [ diff --git a/strix/tools/api_probe/__init__.py b/strix/tools/api_probe/__init__.py new file mode 100644 index 00000000..fae9da73 --- /dev/null +++ b/strix/tools/api_probe/__init__.py @@ -0,0 +1,3 @@ +from .api_probe_actions import load_openapi_spec, suggest_api_fuzz_cases + +__all__ = ["load_openapi_spec", "suggest_api_fuzz_cases"] diff --git a/strix/tools/api_probe/api_probe_actions.py b/strix/tools/api_probe/api_probe_actions.py new file mode 100644 index 00000000..03da857b --- /dev/null +++ b/strix/tools/api_probe/api_probe_actions.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from strix.tools.registry import register_tool + + +def _load_spec(path: Path) -> dict[str, Any]: + if not path.exists(): + raise FileNotFoundError(f"Spec file not found: {path}") + + if path.suffix.lower() in {".yaml", ".yml"}: + try: + import yaml # type: ignore + except ImportError as exc: # pragma: no cover - optional dep + raise RuntimeError("PyYAML required for YAML specs; install pyyaml") from exc + return yaml.safe_load(path.read_text(encoding="utf-8")) or {} + + return json.loads(path.read_text(encoding="utf-8")) + + +def _extract_type(param: dict[str, Any]) -> str: + schema = param.get("schema", {}) + if "type" in schema: + return str(schema["type"]) + if "$ref" in schema: + return "object_ref" + return "unknown" + + +@register_tool(sandbox_execution=False) +def load_openapi_spec(spec_path: str) -> dict[str, Any]: + spec = _load_spec(Path(spec_path)) + paths = spec.get("paths", {}) + endpoints: list[dict[str, Any]] = [] + + for path, methods in paths.items(): + for method, details in methods.items(): + params = [] + for p in details.get("parameters", []): + params.append( + { + "name": p.get("name"), + "in": p.get("in"), + "required": p.get("required", False), + "type": _extract_type(p), + } + ) + endpoints.append( + { + "path": path, + "method": method.upper(), + "summary": details.get("summary", ""), + "params": params, + } + ) + + return {"success": True, "endpoints": endpoints} + + +@register_tool(sandbox_execution=False) +def suggest_api_fuzz_cases(endpoints: list[dict[str, Any]]) -> dict[str, Any]: + fuzz_strings = [ + "' OR '1'='1", + "\"; DROP TABLE users; --", + "../../etc/passwd", + "${{7*7}}", + "", + ] + + suggestions = [] + for ep in endpoints: + param_payloads = [] + for param in ep.get("params", []): + param_payloads.append({"name": param.get("name"), "payload": fuzz_strings[0]}) + suggestions.append( + { + "path": ep.get("path"), + "method": ep.get("method"), + "payloads": param_payloads or [{"payload": fuzz_strings[1]}], + } + ) + + return {"success": True, "suggestions": suggestions} diff --git a/strix/tools/api_probe/api_probe_actions_schema.xml b/strix/tools/api_probe/api_probe_actions_schema.xml new file mode 100644 index 00000000..a7798ca0 --- /dev/null +++ b/strix/tools/api_probe/api_probe_actions_schema.xml @@ -0,0 +1,24 @@ + + + Load an OpenAPI/Swagger specification (JSON or YAML) and extract endpoints, methods, and basic parameter info for planning probes. + + + Path to the OpenAPI/Swagger file (.json/.yaml/.yml). + + + + Endpoints array with path, method, summary, and parameters (name/location/required/type). + + + + Generate lightweight fuzzing suggestions for endpoints based on their parameters (e.g., injection strings). + + + Endpoints as returned by load_openapi_spec. + + + + Fuzz suggestions per endpoint with placeholder payloads. + + + diff --git a/strix/tools/browser/browser_actions.py b/strix/tools/browser/browser_actions.py index ca7a26a1..c1b241b9 100644 --- a/strix/tools/browser/browser_actions.py +++ b/strix/tools/browser/browser_actions.py @@ -24,6 +24,8 @@ "press_key", "save_pdf", "get_console_logs", + "get_network_events", + "capture_screenshot_diff", "view_source", "close", "list_tabs", @@ -156,6 +158,7 @@ def _handle_utility_actions( file_path: str | None = None, tab_id: str | None = None, clear: bool = False, + limit: int | None = None, ) -> dict[str, Any]: if action == "wait": _validate_duration(action, duration) @@ -171,6 +174,10 @@ def _handle_utility_actions( return manager.save_pdf(file_path, tab_id) if action == "get_console_logs": return manager.get_console_logs(tab_id, clear) + if action == "get_network_events": + return manager.get_network_events(tab_id, limit or 50, clear) + if action == "capture_screenshot_diff": + return manager.capture_screenshot_diff(tab_id) if action == "view_source": return manager.view_source(tab_id) if action == "close": @@ -190,6 +197,7 @@ def browser_action( key: str | None = None, file_path: str | None = None, clear: bool = False, + limit: int | None = None, ) -> dict[str, Any]: manager = get_browser_tab_manager() @@ -210,6 +218,8 @@ def browser_action( "execute_js", "save_pdf", "get_console_logs", + "get_network_events", + "capture_screenshot_diff", "view_source", "close", } @@ -222,7 +232,7 @@ def browser_action( return _handle_tab_actions(manager, action, url, tab_id) if action in utility_actions: return _handle_utility_actions( - manager, action, duration, js_code, file_path, tab_id, clear + manager, action, duration, js_code, file_path, tab_id, clear, limit ) _raise_unknown_action(action) diff --git a/strix/tools/browser/browser_actions_schema.xml b/strix/tools/browser/browser_actions_schema.xml index b6fdfc64..e93c9590 100644 --- a/strix/tools/browser/browser_actions_schema.xml +++ b/strix/tools/browser/browser_actions_schema.xml @@ -34,9 +34,12 @@ For 'get_console_logs' action: whether to clear console logs after retrieving them. Default is False (keep logs). + + Optional limit when fetching network events (default 50; max stored 200). + - Response containing: - screenshot: Base64 encoded PNG of the current page state - url: Current page URL - title: Current page title - viewport: Current browser viewport dimensions - tab_id: ID of the current active tab - all_tabs: Dict of all open tab IDs and their URLs - message: Status message about the action performed - js_result: Result of JavaScript execution (for execute_js action) - pdf_saved: File path of saved PDF (for save_pdf action) - console_logs: Array of console messages (for get_console_logs action) Limited to 50KB total and 200 most recent logs. Individual messages truncated at 1KB. - page_source: HTML source code (for view_source action) Large pages are truncated to 100KB (keeping beginning and end sections). + Response containing: - screenshot: Base64 encoded PNG of the current page state - screenshot_changed: Whether the latest screenshot differs from prior capture - url: Current page URL - title: Current page title - viewport: Current browser viewport dimensions - tab_id: ID of the current active tab - all_tabs: Dict of all open tab IDs and their URLs - message: Status message about the action performed - js_result: Result of JavaScript execution (for execute_js action) - pdf_saved: File path of saved PDF (for save_pdf action) - console_logs: Array of console messages (for get_console_logs action) Limited to 50KB total and 200 most recent logs. Individual messages truncated at 1KB. - network_events: Recent requests/responses with status, method, duration_ms (for get_network_events action) - page_source: HTML source code (for view_source action) Large pages are truncated to 100KB (keeping beginning and end sections). Important usage rules: diff --git a/strix/tools/browser/browser_instance.py b/strix/tools/browser/browser_instance.py index 3e756f67..30ec9a3b 100644 --- a/strix/tools/browser/browser_instance.py +++ b/strix/tools/browser/browser_instance.py @@ -30,6 +30,9 @@ def __init__(self) -> None: self._next_tab_id = 1 self.console_logs: dict[str, list[dict[str, Any]]] = {} + self.network_events: dict[str, list[dict[str, Any]]] = {} + self._request_start: dict[int, float] = {} + self._last_screenshots: dict[str, bytes] = {} self._loop: asyncio.AbstractEventLoop | None = None self._loop_thread: threading.Thread | None = None @@ -77,6 +80,32 @@ def handle_console(msg: Any) -> None: page.on("console", handle_console) + async def _setup_network_logging(self, page: Page, tab_id: str) -> None: + self.network_events[tab_id] = [] + + def handle_request(request: Any) -> None: + self._request_start[id(request)] = asyncio.get_event_loop().time() + + def handle_response(response: Any) -> None: + start = self._request_start.pop(id(response.request), None) + duration_ms = None + if start is not None: + duration_ms = round((asyncio.get_event_loop().time() - start) * 1000, 2) + + event = { + "url": response.url, + "method": response.request.method, + "status": response.status, + "resource_type": response.request.resource_type, + "duration_ms": duration_ms, + } + events = self.network_events.get(tab_id, []) + events.append(event) + self.network_events[tab_id] = events[-200:] + + page.on("request", handle_request) + page.on("response", handle_response) + async def _launch_browser(self, url: str | None = None) -> dict[str, Any]: self.playwright = await async_playwright().start() @@ -106,6 +135,7 @@ async def _launch_browser(self, url: str | None = None) -> dict[str, Any]: self.current_page_id = tab_id await self._setup_console_logging(page, tab_id) + await self._setup_network_logging(page, tab_id) if url: await page.goto(url, wait_until="domcontentloaded") @@ -125,6 +155,9 @@ async def _get_page_state(self, tab_id: str | None = None) -> dict[str, Any]: screenshot_bytes = await page.screenshot(type="png", full_page=False) screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8") + previous = self._last_screenshots.get(tab_id) + screenshot_changed = previous is None or previous != screenshot_bytes + self._last_screenshots[tab_id] = screenshot_bytes url = page.url title = await page.title() @@ -144,6 +177,7 @@ async def _get_page_state(self, tab_id: str | None = None) -> dict[str, Any]: "viewport": viewport, "tab_id": tab_id, "all_tabs": all_tabs, + "screenshot_changed": screenshot_changed, } def launch(self, url: str | None = None) -> dict[str, Any]: @@ -275,6 +309,7 @@ async def _new_tab(self, url: str | None = None) -> dict[str, Any]: self.current_page_id = tab_id await self._setup_console_logging(page, tab_id) + await self._setup_network_logging(page, tab_id) if url: await page.goto(url, wait_until="domcontentloaded") @@ -401,6 +436,42 @@ async def _get_console_logs( state["console_logs"] = logs return state + def get_network_events( + self, tab_id: str | None = None, limit: int = 50, clear: bool = False + ) -> dict[str, Any]: + with self._execution_lock: + return self._run_async(self._get_network_events(tab_id, limit, clear)) + + async def _get_network_events( + self, tab_id: str | None = None, limit: int = 50, clear: bool = False + ) -> dict[str, Any]: + if not tab_id: + tab_id = self.current_page_id + + if not tab_id or tab_id not in self.pages: + raise ValueError(f"Tab '{tab_id}' not found") + + events = self.network_events.get(tab_id, []) + limited = events[-limit:] + if clear: + self.network_events[tab_id] = [] + + state = await self._get_page_state(tab_id) + state["network_events"] = limited + return state + + def capture_screenshot_diff(self, tab_id: str | None = None) -> dict[str, Any]: + with self._execution_lock: + return self._run_async(self._capture_screenshot_diff(tab_id)) + + async def _capture_screenshot_diff(self, tab_id: str | None = None) -> dict[str, Any]: + state = await self._get_page_state(tab_id) + return { + "tab_id": state["tab_id"], + "screenshot": state["screenshot"], + "screenshot_changed": state.get("screenshot_changed", False), + } + def view_source(self, tab_id: str | None = None) -> dict[str, Any]: with self._execution_lock: return self._run_async(self._view_source(tab_id)) diff --git a/strix/tools/browser/tab_manager.py b/strix/tools/browser/tab_manager.py index 3b4b674f..42d8a2e1 100644 --- a/strix/tools/browser/tab_manager.py +++ b/strix/tools/browser/tab_manager.py @@ -248,6 +248,39 @@ def get_console_logs(self, tab_id: str | None = None, clear: bool = False) -> di else: return result + def get_network_events( + self, tab_id: str | None = None, limit: int = 50, clear: bool = False + ) -> dict[str, Any]: + with self._lock: + if self.browser_instance is None: + raise ValueError("Browser not launched") + + try: + result = self.browser_instance.get_network_events(tab_id, limit, clear) + result["message"] = ( + f"Network events retrieved for tab {result.get('tab_id', 'current')}" + ) + except (OSError, ValueError, RuntimeError) as e: + raise RuntimeError(f"Failed to get network events: {e}") from e + else: + return result + + def capture_screenshot_diff(self, tab_id: str | None = None) -> dict[str, Any]: + with self._lock: + if self.browser_instance is None: + raise ValueError("Browser not launched") + + try: + result = self.browser_instance.capture_screenshot_diff(tab_id) + result["message"] = ( + f"Screenshot captured for tab {result.get('tab_id', 'current')} " + f"({'changed' if result.get('screenshot_changed') else 'unchanged'})" + ) + except (OSError, ValueError, RuntimeError) as e: + raise RuntimeError(f"Failed to capture screenshot diff: {e}") from e + else: + return result + def view_source(self, tab_id: str | None = None) -> dict[str, Any]: with self._lock: if self.browser_instance is None: diff --git a/strix/tools/cache/__init__.py b/strix/tools/cache/__init__.py new file mode 100644 index 00000000..66c23474 --- /dev/null +++ b/strix/tools/cache/__init__.py @@ -0,0 +1,3 @@ +from .cache_actions import cache_result, get_cached_result + +__all__ = ["cache_result", "get_cached_result"] diff --git a/strix/tools/cache/cache_actions.py b/strix/tools/cache/cache_actions.py new file mode 100644 index 00000000..6b514435 --- /dev/null +++ b/strix/tools/cache/cache_actions.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from strix.tools.registry import register_tool + + +CACHE_DIR = Path.cwd() / "strix_cache" + + +def _cache_path(target: str, action: str) -> Path: + safe_target = target.replace("/", "_").replace(":", "_") + safe_action = action.replace("/", "_") + return CACHE_DIR / f"{safe_target}__{safe_action}.json" + + +@register_tool(sandbox_execution=False) +def cache_result(target: str, action: str, result: str) -> dict[str, Any]: + CACHE_DIR.mkdir(exist_ok=True) + path = _cache_path(target, action) + path.write_text(result, encoding="utf-8") + return {"success": True, "cached_path": str(path)} + + +@register_tool(sandbox_execution=False) +def get_cached_result(target: str, action: str) -> dict[str, Any]: + path = _cache_path(target, action) + if not path.exists(): + return {"success": False, "cached": False} + try: + data = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + data = path.read_text(encoding="utf-8") + return {"success": True, "cached": True, "result": data} diff --git a/strix/tools/cache/cache_actions_schema.xml b/strix/tools/cache/cache_actions_schema.xml new file mode 100644 index 00000000..9a8a9bb8 --- /dev/null +++ b/strix/tools/cache/cache_actions_schema.xml @@ -0,0 +1,33 @@ + + + Store a tool result keyed by target and action fingerprint. + + + Target identifier (e.g., URL or repo). + + + Action name or tool invocation fingerprint. + + + Serialized result payload to store. + + + + Confirmation of cache write. + + + + Retrieve a cached result by target/action fingerprint. + + + Target identifier. + + + Action name or tool invocation fingerprint. + + + + Cached result payload if present. + + + diff --git a/strix/tools/reporting/reporting_actions.py b/strix/tools/reporting/reporting_actions.py index dd98d6db..772aaaff 100644 --- a/strix/tools/reporting/reporting_actions.py +++ b/strix/tools/reporting/reporting_actions.py @@ -8,6 +8,10 @@ def create_vulnerability_report( title: str, content: str, severity: str, + cvss_score: float | None = None, + references: list[str] | None = None, + fix_recommendation: str | None = None, + cwe: list[str] | None = None, ) -> dict[str, Any]: validation_error = None if not title or not title.strip(): @@ -35,6 +39,10 @@ def create_vulnerability_report( title=title, content=content, severity=severity, + cvss_score=cvss_score, + references=references, + fix_recommendation=fix_recommendation, + cwe=cwe, ) return { @@ -42,6 +50,7 @@ def create_vulnerability_report( "message": f"Vulnerability report '{title}' created successfully", "report_id": report_id, "severity": severity.lower(), + "cvss_score": cvss_score, } import logging diff --git a/strix/tools/reporting/reporting_actions_schema.xml b/strix/tools/reporting/reporting_actions_schema.xml index 2e47d60d..5891e169 100644 --- a/strix/tools/reporting/reporting_actions_schema.xml +++ b/strix/tools/reporting/reporting_actions_schema.xml @@ -22,6 +22,18 @@ DO NOT USE: Severity level: critical, high, medium, low, or info + + Optional CVSS base score estimate (0.0 - 10.0) + + + Optional list of references (CWE, OWASP, docs) relevant to the finding + + + Optional concise fix recommendation to include in the report + + + Optional list of CWE identifiers + Response containing success status and message diff --git a/strix/tools/sast/__init__.py b/strix/tools/sast/__init__.py new file mode 100644 index 00000000..530b7bee --- /dev/null +++ b/strix/tools/sast/__init__.py @@ -0,0 +1,3 @@ +from .sast_actions import run_sast_scan, scan_dependencies + +__all__ = ["run_sast_scan", "scan_dependencies"] diff --git a/strix/tools/sast/sast_actions.py b/strix/tools/sast/sast_actions.py new file mode 100644 index 00000000..b42254d9 --- /dev/null +++ b/strix/tools/sast/sast_actions.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any + +from strix.tools.registry import register_tool + + +_DANGEROUS_PATTERNS = [ + { + "id": "PY001", + "regex": r"\beval\(", + "severity": "high", + "message": "Use of eval() can lead to code execution", + }, + { + "id": "PY002", + "regex": r"\bexec\(", + "severity": "high", + "message": "Use of exec() can lead to code execution", + }, + { + "id": "PY003", + "regex": r"subprocess\.(run|Popen)\([^)]*shell\s*=\s*True", + "severity": "high", + "message": "subprocess with shell=True can lead to command injection", + }, + { + "id": "PY004", + "regex": r"random\.(randrange|randint|random)\(", + "severity": "medium", + "message": "Insecure randomness; prefer secrets module for security tokens", + }, +] + +_SKIP_DIRS = {".git", ".venv", "venv", "__pycache__", "node_modules", ".tox", ".ruff_cache"} + + +def _iter_code_files(base: Path, max_files: int) -> list[Path]: + files: list[Path] = [] + for path in base.rglob("*.py"): + if any(part in _SKIP_DIRS for part in path.parts): + continue + files.append(path) + if len(files) >= max_files: + break + return files + + +@register_tool(sandbox_execution=False) +def run_sast_scan(target_path: str | None = None, max_files: int = 200) -> dict[str, Any]: + base = Path(target_path or ".").resolve() + findings: list[dict[str, Any]] = [] + + for file_path in _iter_code_files(base, max_files): + try: + text = file_path.read_text(encoding="utf-8", errors="ignore") + except OSError: + continue + + lines = text.splitlines() + for idx, line in enumerate(lines, start=1): + for pattern in _DANGEROUS_PATTERNS: + if re.search(pattern["regex"], line): + findings.append( + { + "file": str(file_path), + "line": idx, + "rule_id": pattern["id"], + "severity": pattern["severity"], + "message": pattern["message"], + "snippet": line.strip(), + } + ) + return {"success": True, "findings": {"static": findings}} + + +@register_tool(sandbox_execution=False) +def scan_dependencies(target_path: str | None = None) -> dict[str, Any]: + base = Path(target_path or ".").resolve() + findings: list[dict[str, Any]] = [] + + req_file = base / "requirements.txt" + if req_file.exists(): + for line in req_file.read_text(encoding="utf-8", errors="ignore").splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + if "==" not in stripped and ">=" not in stripped and "<=" not in stripped: + findings.append( + { + "package": stripped, + "spec": "unpinned", + "severity": "medium", + "reason": "Dependency is not pinned; prefer exact versions", + } + ) + + pyproject = base / "pyproject.toml" + if pyproject.exists(): + try: + import tomllib + + data = tomllib.loads(pyproject.read_text(encoding="utf-8")) + deps = data.get("tool", {}).get("poetry", {}).get("dependencies", {}) + for pkg, spec in deps.items(): + if pkg == "python": + continue + if spec == "*" or spec == "^0.0.0": + findings.append( + { + "package": pkg, + "spec": str(spec), + "severity": "medium", + "reason": "Wildcard dependency version detected", + } + ) + except Exception: + findings.append( + { + "package": "unknown", + "spec": "parse_error", + "severity": "low", + "reason": "Unable to parse pyproject.toml for dependency checks", + } + ) + + return {"success": True, "findings": {"dependencies": findings}} diff --git a/strix/tools/sast/sast_actions_schema.xml b/strix/tools/sast/sast_actions_schema.xml new file mode 100644 index 00000000..df64fd13 --- /dev/null +++ b/strix/tools/sast/sast_actions_schema.xml @@ -0,0 +1,27 @@ + + + Perform a lightweight static scan of Python files to flag risky constructs (eval/exec, shell=True, weak randomness) for quick triage. + + + Path to scan (defaults to current working directory). + + + Optional cap on number of files to scan (default 200). + + + + Findings grouped under 'static' with file, line, rule_id, severity, and message. + + + + Collect Python dependencies from requirements.txt or pyproject.toml and flag unpinned or wildcard versions. + + + Path to scan (defaults to current working directory). + + + + Dependency findings under 'dependencies' with package, spec, severity, and reason. + + + diff --git a/tests/agents/test_graph_builder.py b/tests/agents/test_graph_builder.py new file mode 100644 index 00000000..2bbf8097 --- /dev/null +++ b/tests/agents/test_graph_builder.py @@ -0,0 +1,61 @@ +import json +from pathlib import Path + +import pytest + +from strix.agents.graph_builder import ( + AgentGraphSpec, + GraphBuilderError, + load_graph_spec, + parse_graph_spec, +) + + +def test_parse_graph_spec_validates_and_returns_root() -> None: + raw = { + "agents": [ + {"id": "root", "name": "Root", "task": "Root task"}, + {"id": "child", "name": "Child", "task": "Child task", "parent_id": "root"}, + ] + } + + spec = parse_graph_spec(raw) + + assert isinstance(spec, AgentGraphSpec) + assert spec.root.id == "root" + graph_dict = spec.as_graph_dict() + assert len(graph_dict["nodes"]) == 2 + assert graph_dict["edges"] == [{"from": "root", "to": "child", "type": "delegation"}] + + +def test_parse_graph_spec_rejects_invalid_parent() -> None: + raw = {"agents": [{"id": "orphan", "name": "Orphan", "task": "Task", "parent_id": "missing"}]} + with pytest.raises(GraphBuilderError): + parse_graph_spec(raw) + + +def test_load_graph_spec_reads_json(tmp_path: Path) -> None: + path = tmp_path / "graph.json" + path.write_text( + json.dumps( + { + "agents": [ + { + "id": "root", + "name": "Root", + "task": "Root task", + "prompt_modules": ["root_agent"], + "max_iterations": 123, + } + ] + } + ), + encoding="utf-8", + ) + + spec = load_graph_spec(path) + configs = spec.build_agent_configs({"llm_prompt_modules": ["default"]}) + + assert configs[0]["agent_id"] == "root" + assert configs[0]["max_iterations"] == 123 + assert configs[0]["llm_prompt_modules"] == ["root_agent"] diff --git a/tests/agents/test_iteration_policy.py b/tests/agents/test_iteration_policy.py new file mode 100644 index 00000000..47c21cad --- /dev/null +++ b/tests/agents/test_iteration_policy.py @@ -0,0 +1,23 @@ +from strix.agents.iteration_policy import calculate_iteration_budget + + +def test_calculate_iteration_budget_scales_with_targets() -> None: + targets = [ + {"type": "repository", "details": {}}, + {"type": "web_application", "details": {}}, + {"type": "local_code", "details": {}}, + ] + + result = calculate_iteration_budget(targets, llm_timeout=700, base=300) + + assert result["max_iterations"] >= 300 + assert result["inputs"]["target_weight"] == 5 + assert result["inputs"]["latency_adjustment"] > 0 + + +def test_calculate_iteration_budget_bounds() -> None: + result = calculate_iteration_budget([], llm_timeout=None, base=50) + assert result["max_iterations"] >= 180 + + result = calculate_iteration_budget([{"type": "repository"}] * 20, llm_timeout=2000, base=500) + assert result["max_iterations"] <= 600 diff --git a/tests/agents/test_state_persistence.py b/tests/agents/test_state_persistence.py new file mode 100644 index 00000000..c9ca9b19 --- /dev/null +++ b/tests/agents/test_state_persistence.py @@ -0,0 +1,17 @@ +from pathlib import Path + +from strix.agents.state import AgentState + + +def test_state_save_and_load_round_trip(tmp_path: Path) -> None: + state = AgentState(agent_name="Tester", task="Do work", iteration=5) + state.add_message("user", "hello") + path = tmp_path / "state.json" + + saved_path = state.save_to_path(path) + loaded = AgentState.load_from_path(saved_path) + + assert loaded.agent_name == "Tester" + assert loaded.task == "Do work" + assert loaded.messages[-1]["content"] == "hello" + assert loaded.iteration == 5 diff --git a/tests/llm/test_router.py b/tests/llm/test_router.py new file mode 100644 index 00000000..50ea1850 --- /dev/null +++ b/tests/llm/test_router.py @@ -0,0 +1,39 @@ +import asyncio + +import pytest + +from strix.llm.router import MultiplexingLLM + + +class DummyLLM: + def __init__(self, should_fail: bool = False): + self.should_fail = should_fail + self.calls = 0 + + async def generate(self, *args, **kwargs): # type: ignore[override] + self.calls += 1 + if self.should_fail: + raise RuntimeError("failure") + return "ok" + + +@pytest.mark.asyncio +async def test_multiplexing_llm_fallbacks() -> None: + primary = DummyLLM(should_fail=True) + fallback = DummyLLM() + router = MultiplexingLLM(primary, fallback) + + result = await router.generate("msg") + + assert result == "ok" + assert primary.calls == 1 + assert fallback.calls == 1 + + +@pytest.mark.asyncio +async def test_multiplexing_llm_raises_without_fallback() -> None: + primary = DummyLLM(should_fail=True) + router = MultiplexingLLM(primary, None) + + with pytest.raises(RuntimeError): + await router.generate("msg") diff --git a/tests/prompts/test_auth_playbook_prompt.py b/tests/prompts/test_auth_playbook_prompt.py new file mode 100644 index 00000000..b19bbed2 --- /dev/null +++ b/tests/prompts/test_auth_playbook_prompt.py @@ -0,0 +1,25 @@ +from pathlib import Path + +from jinja2 import Environment, FileSystemLoader, select_autoescape + +from strix.prompts import get_all_module_names, load_prompt_modules + + +def _jinja_env() -> Environment: + prompts_dir = Path(__file__).parents[2] / "strix" / "prompts" + return Environment( + loader=FileSystemLoader(prompts_dir), + autoescape=select_autoescape(enabled_extensions=(), default_for_string=False), + ) + + +def test_auth_playbook_module_available() -> None: + modules = get_all_module_names() + assert "oidc_saml_sso" in modules + + +def test_auth_playbook_renders() -> None: + env = _jinja_env() + content = load_prompt_modules(["oidc_saml_sso"], env) + assert "oidc_saml_sso" in content + assert "OIDC" in content["oidc_saml_sso"] diff --git a/tests/runtime/test_benchmark.py b/tests/runtime/test_benchmark.py new file mode 100644 index 00000000..834cb807 --- /dev/null +++ b/tests/runtime/test_benchmark.py @@ -0,0 +1,11 @@ +from strix.runtime.benchmark import run_benchmark + + +def test_run_benchmark_records_duration() -> None: + def sample(): + return "ok" + + result = run_benchmark("sample", sample) + assert result["name"] == "sample" + assert result["duration_ms"] >= 0 + assert result["result"] == "ok" diff --git a/tests/runtime/test_tool_pool.py b/tests/runtime/test_tool_pool.py new file mode 100644 index 00000000..93d316ad --- /dev/null +++ b/tests/runtime/test_tool_pool.py @@ -0,0 +1,20 @@ +import asyncio + +import pytest + +from strix.runtime.tool_pool import ToolServerPool + + +def make_stub(): + return object() + + +@pytest.mark.asyncio +async def test_tool_pool_spawns_and_reuses() -> None: + pool = ToolServerPool(make_stub, max_instances=1) + inst1 = await pool.get_instance() + inst2 = await pool.get_instance() + assert inst1 is inst2 + await pool.mark_unhealthy(inst1) + health = await pool.get_health() + assert health[id(inst1)] == "unhealthy" diff --git a/tests/tools/test_api_probe_tool.py b/tests/tools/test_api_probe_tool.py new file mode 100644 index 00000000..c9abf6d8 --- /dev/null +++ b/tests/tools/test_api_probe_tool.py @@ -0,0 +1,36 @@ +from pathlib import Path + +from strix.tools.api_probe.api_probe_actions import load_openapi_spec, suggest_api_fuzz_cases + + +def test_load_openapi_spec_parses_endpoints(tmp_path: Path) -> None: + spec = { + "openapi": "3.0.0", + "paths": { + "/users": { + "get": { + "summary": "List users", + "parameters": [ + {"name": "limit", "in": "query", "required": False, "schema": {"type": "integer"}} + ], + } + } + }, + } + spec_path = tmp_path / "spec.json" + spec_path.write_text(__import__("json").dumps(spec), encoding="utf-8") + + result = load_openapi_spec(str(spec_path)) + endpoints = result["endpoints"] + + assert endpoints[0]["path"] == "/users" + assert endpoints[0]["method"] == "GET" + assert endpoints[0]["params"][0]["name"] == "limit" + + +def test_suggest_api_fuzz_cases_generates_payloads() -> None: + endpoints = [{"path": "/users", "method": "GET", "params": [{"name": "id"}]}] + + result = suggest_api_fuzz_cases(endpoints) + + assert result["suggestions"][0]["payloads"][0]["name"] == "id" diff --git a/tests/tools/test_reporting_enrichment.py b/tests/tools/test_reporting_enrichment.py new file mode 100644 index 00000000..23b71413 --- /dev/null +++ b/tests/tools/test_reporting_enrichment.py @@ -0,0 +1,18 @@ +from strix.telemetry.tracer import Tracer + + +def test_report_includes_cvss_and_refs() -> None: + tracer = Tracer("test-run") + tracer.add_vulnerability_report( + title="Test vuln", + content="Issue details", + severity="high", + cvss_score=7.5, + references=["CWE-79", "OWASP-A01"], + fix_recommendation="Sanitize inputs", + cwe=["CWE-79"], + ) + + report = tracer.vulnerability_reports[0] + assert report["cvss_score"] == 7.5 + assert "CWE-79" in report["references"] diff --git a/tests/tools/test_sast_tool.py b/tests/tools/test_sast_tool.py new file mode 100644 index 00000000..9e1ebf46 --- /dev/null +++ b/tests/tools/test_sast_tool.py @@ -0,0 +1,44 @@ +from pathlib import Path + +from strix.tools.sast.sast_actions import run_sast_scan, scan_dependencies + + +def test_run_sast_scan_flags_eval(tmp_path: Path) -> None: + suspicious = tmp_path / "app.py" + suspicious.write_text("def run():\n return eval('1+1')\n", encoding="utf-8") + + result = run_sast_scan(str(tmp_path)) + + findings = result["findings"]["static"] + assert any(f["rule_id"] == "PY001" for f in findings) + assert findings[0]["file"].endswith("app.py") + + +def test_scan_dependencies_flags_unpinned_requirement(tmp_path: Path) -> None: + req = tmp_path / "requirements.txt" + req.write_text("flask\nrequests>=2.0.0\n", encoding="utf-8") + + result = scan_dependencies(str(tmp_path)) + + findings = result["findings"]["dependencies"] + assert any(f["package"] == "flask" and f["spec"] == "unpinned" for f in findings) + + +def test_scan_dependencies_flags_wildcard_pyproject(tmp_path: Path) -> None: + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text( + """ +[tool.poetry] +name = "sample" +version = "0.1.0" + +[tool.poetry.dependencies] +python = "^3.12" +flask = "*" +""", + encoding="utf-8", + ) + + result = scan_dependencies(str(tmp_path)) + findings = result["findings"]["dependencies"] + assert any(f["package"] == "flask" for f in findings)