diff --git a/skills/auto/SKILL.md b/skills/auto/SKILL.md index 2f2e8809e..d9c0e58d1 100644 --- a/skills/auto/SKILL.md +++ b/skills/auto/SKILL.md @@ -9,6 +9,8 @@ mcp_args: max_interview_rounds: "$max_interview_rounds" max_repair_rounds: "$max_repair_rounds" skip_run: "$skip_run" + driver: "$driver" + brake: "$brake" --- # /ouroboros:auto @@ -30,6 +32,7 @@ is unavailable. A manual fallback is not an `ooo auto` run. ooo auto "Build a local-first habit tracker CLI" ooo auto --resume auto_abc123 ooo auto "Build a local-first habit tracker CLI" --skip-run +ooo auto "Build a local-first habit tracker CLI" --driver hermes --brake on /ouroboros:auto "Build a local-first habit tracker CLI" ``` @@ -42,3 +45,10 @@ ooo auto "Build a local-first habit tracker CLI" --skip-run 5. Starts execution only after A-grade. The pipeline must not hang indefinitely: all loops are bounded and timeout failures return a resumable `auto_session_id`. Resume with `ooo auto --resume `. Use `--skip-run` to stop after the A-grade Seed. The CLI-only `--show-ledger` flag prints assumptions/non-goals; MCP skill responses already include the same ledger summary when available. + +When invoked through the interactive CLI without `--driver` or a configured +default driver, `ooo auto` asks whether to use a selected interview driver if +one of the supported driver CLIs is installed. Declining that prompt, or having +no installed driver CLI, keeps the deterministic auto answerer. Use +`--driver ` to select a driver explicitly; use `--brake on|off` to +control whether risky driver answers block for approval. diff --git a/src/ouroboros/auto/__init__.py b/src/ouroboros/auto/__init__.py index 0d79e912b..8808c829f 100644 --- a/src/ouroboros/auto/__init__.py +++ b/src/ouroboros/auto/__init__.py @@ -6,7 +6,7 @@ before starting execution. """ -from ouroboros.auto.answerer import AutoAnswer, AutoAnswerer, AutoAnswerSource +from ouroboros.auto.answerer import AutoAnswer, AutoAnswerer, AutoAnswerMetadata, AutoAnswerSource from ouroboros.auto.grading import GradeGate, GradeResult, SeedGrade from ouroboros.auto.interview_driver import AutoInterviewDriver, AutoInterviewResult, InterviewTurn from ouroboros.auto.ledger import LedgerEntry, LedgerSection, SeedDraftLedger @@ -17,6 +17,7 @@ __all__ = [ "AutoAnswer", + "AutoAnswerMetadata", "AutoAnswerSource", "AutoAnswerer", "AutoInterviewDriver", diff --git a/src/ouroboros/auto/answerer.py b/src/ouroboros/auto/answerer.py index b9e8ff684..449a743e5 100644 --- a/src/ouroboros/auto/answerer.py +++ b/src/ouroboros/auto/answerer.py @@ -18,6 +18,7 @@ class AutoAnswerSource(StrEnum): EXISTING_CONVENTION = "existing_convention" CONSERVATIVE_DEFAULT = "conservative_default" ASSUMPTION = "assumption" + DRIVER = "driver" NON_GOAL = "non_goal" BLOCKER = "blocker" @@ -64,6 +65,15 @@ class AutoBlocker: question: str +@dataclass(frozen=True, slots=True) +class AutoAnswerMetadata: + """Structured provenance for auto answers that need audit context.""" + + risk: str | None = None + confidence: float | None = None + provenance: tuple[str, ...] = () + + @dataclass(frozen=True, slots=True) class AutoAnswer: """Answer plus structured ledger updates.""" @@ -75,6 +85,7 @@ class AutoAnswer: assumptions: list[str] = field(default_factory=list) non_goals: list[str] = field(default_factory=list) blocker: AutoBlocker | None = None + metadata: AutoAnswerMetadata = field(default_factory=AutoAnswerMetadata) @property def prefixed_text(self) -> str: diff --git a/src/ouroboros/auto/driver_answerer.py b/src/ouroboros/auto/driver_answerer.py new file mode 100644 index 000000000..153861643 --- /dev/null +++ b/src/ouroboros/auto/driver_answerer.py @@ -0,0 +1,344 @@ +"""Selected-driver interview answering for ``ooo auto``.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +import re +from typing import Protocol + +from ouroboros.auto.answerer import ( + AutoAnswer, + AutoAnswerContext, + AutoAnswerer, + AutoAnswerMetadata, + AutoAnswerSource, + AutoBlocker, +) +from ouroboros.auto.ledger import LedgerEntry, LedgerSource, LedgerStatus, SeedDraftLedger +from ouroboros.auto.state import AutoBrakeMode +from ouroboros.providers.base import CompletionConfig, LLMAdapter, Message, MessageRole +from ouroboros.providers.factory import create_llm_adapter, resolve_llm_backend + + +class AsyncAutoAnswerer(Protocol): + """Protocol for answerers that can draft interview answers asynchronously.""" + + async def answer( + self, question: str, ledger: SeedDraftLedger, context: AutoAnswerContext | None = None + ) -> AutoAnswer: + """Draft an answer for one interview question.""" + + def apply(self, answer: AutoAnswer, ledger: SeedDraftLedger, *, question: str) -> None: + """Apply ledger updates associated with an answer.""" + + +@dataclass(slots=True) +class DriverAutoAnswerer: + """Ask the selected ``llm.backend`` driver to answer every interview question. + + The existing deterministic ``AutoAnswerer`` is still used as a ledger/risk + scaffold, but the text sent back to the interview backend comes from the + selected driver. With brake=on, high-impact/risky drafts become approval + blockers. With brake=off, they are sent automatically with assumption and + provenance tags so the later Seed-ready/A-grade gates remain the safety net. + """ + + backend: str | None = None + brake: AutoBrakeMode = AutoBrakeMode.ON + cwd: str | Path | None = None + adapter: LLMAdapter | None = None + baseline: AutoAnswerer = field(default_factory=AutoAnswerer) + timeout_seconds: float | None = 60.0 + + def __post_init__(self) -> None: + self.backend = resolve_llm_backend(self.backend) + + async def answer( + self, question: str, ledger: SeedDraftLedger, context: AutoAnswerContext | None = None + ) -> AutoAnswer: + """Return the selected driver's answer for ``question``.""" + scaffold = self.baseline.answer(question, ledger, context) + risk = classify_interview_answer_risk(question, scaffold) + if risk and self.brake == AutoBrakeMode.ON: + reason = f"brake on: risky auto interview answer requires approval ({risk})" + return AutoAnswer( + text=f"Cannot send automatically without approval: {risk}", + source=AutoAnswerSource.BLOCKER, + confidence=1.0, + blocker=AutoBlocker(reason=reason, question=question), + metadata=_answer_metadata( + backend=self.backend or "driver", + brake=self.brake, + risk=risk, + confidence=1.0, + scaffold=scaffold, + ), + ) + + if self.adapter is None: + allowed_tools: list[str] | None = None if self.backend == "hermes" else [] + self.adapter = create_llm_adapter( + backend=self.backend, + use_case="interview", + cwd=self.cwd, + allowed_tools=allowed_tools, + max_turns=1, + timeout=self.timeout_seconds, + ) + assert self.adapter is not None + prompt = _driver_prompt( + question, ledger, scaffold, backend=self.backend or "driver", risk=risk + ) + result = await self.adapter.complete( + messages=[Message(role=MessageRole.USER, content=prompt)], + config=CompletionConfig( + model="default", + temperature=0.2, + max_tokens=700, + role="auto_interview_answer", + max_turns=1, + ), + ) + if not result.is_ok: + return AutoAnswer( + text=f"Cannot obtain driver answer: {result.error}", + source=AutoAnswerSource.BLOCKER, + confidence=1.0, + blocker=AutoBlocker( + reason=f"selected driver {self.backend} failed to answer: {result.error}", + question=question, + ), + metadata=_answer_metadata( + backend=self.backend or "driver", + brake=self.brake, + risk="driver answer unavailable", + confidence=1.0, + scaffold=scaffold, + ), + ) + text = _clean_driver_text(result.value.content) + if not text: + return AutoAnswer( + text="Cannot obtain driver answer: empty response", + source=AutoAnswerSource.BLOCKER, + confidence=1.0, + blocker=AutoBlocker( + reason=f"selected driver {self.backend} returned an empty answer", + question=question, + ), + metadata=_answer_metadata( + backend=self.backend or "driver", + brake=self.brake, + risk="empty driver answer", + confidence=1.0, + scaffold=scaffold, + ), + ) + + assumptions = list(scaffold.assumptions) + confidence = min(scaffold.confidence, 0.82) + if risk: + assumptions.append(f"brake off auto-sent risky driver answer: {risk}") + confidence = min(confidence, 0.62) + tagged_text = _tag_driver_text( + text, backend=self.backend or "driver", brake=self.brake, risk=risk + ) + return AutoAnswer( + text=tagged_text, + source=AutoAnswerSource.DRIVER, + confidence=confidence, + ledger_updates=_ledger_updates_for( + scaffold, + driver_text=tagged_text, + risk=risk, + backend=self.backend or "driver", + ), + assumptions=assumptions, + non_goals=list(scaffold.non_goals), + metadata=_answer_metadata( + backend=self.backend or "driver", + brake=self.brake, + risk=risk, + confidence=confidence, + scaffold=scaffold, + ), + ) + + def apply(self, answer: AutoAnswer, ledger: SeedDraftLedger, *, question: str) -> None: + """Apply a selected-driver answer to the ledger.""" + self.baseline.apply(answer, ledger, question=question) + + +def classify_interview_answer_risk(question: str, scaffold: AutoAnswer | None = None) -> str | None: + """Return a risk label when an interview answer should be approval-gated.""" + if scaffold is not None and scaffold.blocker is not None: + return scaffold.blocker.reason + lowered = question.lower() + patterns: tuple[tuple[str, str], ...] = ( + ( + r"\b(legal|privacy|pii|gdpr|hipaa|compliance|security|credential|secret|token|api key|password)\b", + "legal/privacy/security/compliance", + ), + ( + r"\b(delete|destroy|drop|wipe|remove|irreversible|production|prod|deploy|billing|charge|payment|financial)\b", + "destructive or financial/production choice", + ), + ( + ( + r"\b(?:expand|new acceptance|scope|trade[- ]?off|pricing|business|product decision)\b" + r"|\badd(?:ing)?\s+(?:(?:a|an|the)\s+)?" + r"(?:features?|capabilit(?:y|ies)|support|requirements?" + r"|epic|story(?:\s+points?)?|product\s+area|scope)\b" + ), + "scope or product/business tradeoff", + ), + ( + r"\b(prefer|preference|always|never)\b.*\b(user|customer|stakeholder)\b", + "unknown user preference", + ), + ) + for pattern, label in patterns: + if re.search(pattern, lowered): + return label + if scaffold is not None and scaffold.confidence < 0.65: + return "low-confidence high-impact answer" + return None + + +def _driver_prompt( + question: str, + ledger: SeedDraftLedger, + scaffold: AutoAnswer, + *, + backend: str, + risk: str | None, +) -> str: + open_gaps = ", ".join(ledger.open_gaps()) or "none" + risk_line = f"Risk label: {risk}." if risk else "Risk label: none." + return f"""You are the selected ooo auto interview driver: {backend}. +Answer the Ouroboros Socratic interview question on behalf of the user. + +Rules: +- Answer directly and concisely in 1-4 sentences. +- Preserve the user's goal and avoid inventing user preferences. +- If you make an assumption, state it explicitly. +- Do not ask a follow-up question; this auto mode must answer every interview question. +- Existing auto pipeline, Seed-ready checks, and A-grade review continue after your answer. + +Current goal: {_ledger_goal(ledger)} +Open ledger gaps: {open_gaps} +Deterministic scaffold answer: {scaffold.text} +{risk_line} + +Interview question: +{question} +""".strip() + + +def _ledger_goal(ledger: SeedDraftLedger) -> str: + entries = ledger.sections.get("goal").entries if "goal" in ledger.sections else [] + for entry in reversed(entries): + if entry.value.strip(): + return entry.value.strip() + return "" + + +def _clean_driver_text(text: str) -> str: + text = text.strip() + if text.startswith("```") and text.endswith("```"): + text = text.strip("`").strip() + return text + + +def _tag_driver_text(text: str, *, backend: str, brake: AutoBrakeMode, risk: str | None) -> str: + tags = [f"driver={backend}", f"brake={brake.value}"] + if risk: + tags.append(f"risk={risk}") + return f"[{' ; '.join(tags)}] {text}" + + +def _answer_metadata( + *, + backend: str, + brake: AutoBrakeMode, + risk: str | None, + confidence: float, + scaffold: AutoAnswer, +) -> AutoAnswerMetadata: + """Build structured selected-driver provenance for downstream audit surfaces.""" + return AutoAnswerMetadata( + risk=risk, + confidence=max(0.0, min(1.0, float(confidence))), + provenance=( + f"driver:{backend}", + f"brake:{brake.value}", + f"scaffold_source:{scaffold.source.value}", + ), + ) + + +def _ledger_updates_for( + scaffold: AutoAnswer, *, driver_text: str, risk: str | None, backend: str +) -> list[tuple[str, LedgerEntry]]: + """Build ledger updates that reflect the driver's actual freeform answer. + + The deterministic scaffold provides the structural keys (so downstream + seed-generation stays section-aware), but each entry's value is replaced + with the driver's freeform answer so the persisted ledger and the + interview transcript carry the same content. The entry is marked + ``INFERRED`` with reduced confidence and an ``auto_interview_transcript`` + evidence marker, so grading/A-grade gates can downgrade or re-verify the + answer without the interview loop treating the section as an open gap + (``CONFLICTING`` would block ``is_seed_ready`` and prevent convergence). + The original scaffold value is preserved verbatim in the rationale and + evidence as audit context. + """ + updates = [ + ( + section, + LedgerEntry( + key=entry.key, + value=f"driver:{backend} answer (verbatim): {driver_text}", + source=entry.source, + confidence=min(entry.confidence, 0.4), + status=LedgerStatus.INFERRED, + reversible=entry.reversible, + rationale=( + "Selected-driver freeform answer is the canonical source; the " + "scaffold's structural key is kept so Seed generation stays " + "section-aware. The entry is marked INFERRED with low confidence " + "and an auto_interview_transcript evidence marker so grading and " + "the A-grade gate can flag it for verification while the interview " + "loop still converges. Original scaffold value (kept only as " + f"evidence): {entry.value}" + ), + evidence=[ + *entry.evidence, + f"driver:{backend}", + "auto_interview_transcript", + f"scaffold_value:{entry.value}", + ], + ), + ) + for section, entry in scaffold.ledger_updates + ] + if risk: + updates.append( + ( + "constraints", + LedgerEntry( + key=f"risk.auto_driver.{_slug_key(risk)}", + value=f"Driver {backend} auto-sent a risky interview answer under brake=off: {risk}", + source=LedgerSource.ASSUMPTION, + confidence=0.6, + status=LedgerStatus.INFERRED, + rationale="Risk was preserved as provenance for Seed-ready and A-grade review gates.", + ), + ) + ) + return updates + + +def _slug_key(value: str) -> str: + return re.sub(r"[^a-z0-9]+", "_", value.lower()).strip("_") or "risk" diff --git a/src/ouroboros/auto/interview_driver.py b/src/ouroboros/auto/interview_driver.py index 4a8d87416..86a27cf6b 100644 --- a/src/ouroboros/auto/interview_driver.py +++ b/src/ouroboros/auto/interview_driver.py @@ -5,6 +5,7 @@ import asyncio from collections.abc import Awaitable, Callable from dataclasses import dataclass, field +import inspect import re from typing import Protocol @@ -127,7 +128,7 @@ async def run(self, state: AutoPipelineState, ledger: SeedDraftLedger) -> AutoIn state.mark_progress(f"interview round {round_number}/{self.max_rounds}") self._save(state) - answer = self._answer_with_gap_steering(turn.question, ledger, answer_context) + answer = await self._answer_with_gap_steering(turn.question, ledger, answer_context) if answer.blocker is not None: self.answerer.apply(answer, ledger, question=turn.question) state.ledger = ledger.to_dict() @@ -193,10 +194,10 @@ async def run(self, state: AutoPipelineState, ledger: SeedDraftLedger) -> AutoIn "blocked", state.interview_session_id, ledger, self.max_rounds, blocker ) - def _answer_with_gap_steering( + async def _answer_with_gap_steering( self, question: str, ledger: SeedDraftLedger, context: AutoAnswerContext ) -> AutoAnswer: - answer = self.answerer.answer(question, ledger, context) + answer = await self._answer(question, ledger, context) if answer.blocker is not None: return answer gaps = self.gap_detector.detect(ledger) @@ -219,7 +220,15 @@ def _answer_with_gap_steering( confidence=1.0, blocker=blocker, ) - return self.answerer.answer(_gap_prompt(next_gap), ledger, context) + return await self._answer(_gap_prompt(next_gap), ledger, context) + + async def _answer( + self, question: str, ledger: SeedDraftLedger, context: AutoAnswerContext + ) -> AutoAnswer: + answer = self.answerer.answer(question, ledger, context) + if inspect.isawaitable(answer): + answer = await answer + return answer def _handle_completed_turn( self, state: AutoPipelineState, ledger: SeedDraftLedger, turn: InterviewTurn, rounds: int diff --git a/src/ouroboros/auto/pipeline.py b/src/ouroboros/auto/pipeline.py index dccdc237a..3a1b6c627 100644 --- a/src/ouroboros/auto/pipeline.py +++ b/src/ouroboros/auto/pipeline.py @@ -42,6 +42,8 @@ class AutoPipelineResult: last_grade: str | None = None run_handoff_status: str | None = None run_handoff_guidance: str | None = None + max_interview_rounds: int | None = None + max_repair_rounds: int | None = None assumptions: tuple[str, ...] = () non_goals: tuple[str, ...] = () blocker: str | None = None @@ -402,6 +404,8 @@ def _result( last_grade=state.last_grade, run_handoff_status=state.run_handoff_status, run_handoff_guidance=state.run_handoff_guidance, + max_interview_rounds=state.max_interview_rounds, + max_repair_rounds=state.max_repair_rounds, assumptions=tuple(ledger.assumptions()), non_goals=tuple(ledger.non_goals()), blocker=blocker or state.last_error, diff --git a/src/ouroboros/auto/state.py b/src/ouroboros/auto/state.py index 9f669832d..122cf1bae 100644 --- a/src/ouroboros/auto/state.py +++ b/src/ouroboros/auto/state.py @@ -32,6 +32,13 @@ class AutoPolicy(StrEnum): BALANCED = "balanced" +class AutoBrakeMode(StrEnum): + """Safety-brake modes for driver-selected interview answering.""" + + ON = "on" + OFF = "off" + + TERMINAL_PHASES = {AutoPhase.COMPLETE, AutoPhase.BLOCKED, AutoPhase.FAILED} _ALLOWED_TRANSITIONS: dict[AutoPhase, set[AutoPhase]] = { AutoPhase.CREATED: {AutoPhase.INTERVIEW, AutoPhase.BLOCKED, AutoPhase.FAILED}, @@ -88,6 +95,8 @@ class AutoPipelineState: required_grade: str = "A" runtime_backend: str | None = None opencode_mode: str | None = None + interview_driver_backend: str | None = None + brake: AutoBrakeMode = AutoBrakeMode.ON skip_run: bool = False max_interview_rounds: int = 12 max_repair_rounds: int = 5 @@ -181,6 +190,7 @@ def to_dict(self) -> dict[str, Any]: data = asdict(self) data["phase"] = self.phase.value data["policy"] = self.policy.value + data["brake"] = self.brake.value return data @classmethod @@ -194,6 +204,8 @@ def from_dict(cls, data: dict[str, Any]) -> AutoPipelineState: payload.setdefault("max_repair_rounds", 5) payload.setdefault("run_handoff_status", None) payload.setdefault("run_handoff_guidance", None) + payload.setdefault("interview_driver_backend", None) + payload.setdefault("brake", AutoBrakeMode.ON.value) required_fields = {item.name for item in fields(cls)} missing_fields = sorted(required_fields - payload.keys()) if missing_fields: @@ -201,6 +213,7 @@ def from_dict(cls, data: dict[str, Any]) -> AutoPipelineState: raise ValueError(msg) payload["phase"] = AutoPhase(payload["phase"]) payload["policy"] = AutoPolicy(payload["policy"]) + payload["brake"] = AutoBrakeMode(payload["brake"]) state = cls(**payload) state._validate_loaded() return state @@ -288,6 +301,7 @@ def _validate_loaded(self) -> None: optional_string_fields = ( "runtime_backend", "opencode_mode", + "interview_driver_backend", "interview_session_id", "seed_id", "seed_path", diff --git a/src/ouroboros/cli/commands/auto.py b/src/ouroboros/cli/commands/auto.py index 87bfa33b8..5111bea1b 100644 --- a/src/ouroboros/cli/commands/auto.py +++ b/src/ouroboros/cli/commands/auto.py @@ -6,6 +6,8 @@ from enum import Enum import os from pathlib import Path +import shutil +import sys from typing import Annotated import typer @@ -17,16 +19,34 @@ load_seed, save_seed, ) +from ouroboros.auto.answerer import AutoAnswerer +from ouroboros.auto.driver_answerer import DriverAutoAnswerer from ouroboros.auto.interview_driver import AutoInterviewDriver from ouroboros.auto.pipeline import AutoPipeline, AutoPipelineResult from ouroboros.auto.seed_repairer import SeedRepairer -from ouroboros.auto.state import AutoPipelineState, AutoStore +from ouroboros.auto.state import AutoBrakeMode, AutoPipelineState, AutoStore +from ouroboros.backends import ( + get_backend_capability, + interview_driver_backend_choices, + resolve_interview_driver_backend, +) from ouroboros.cli.formatters import console from ouroboros.cli.formatters.panels import print_error, print_info, print_success -from ouroboros.config import get_opencode_mode +from ouroboros.config import ( + get_auto_interview_driver_backend, + get_cli_path, + get_codex_cli_path, + get_copilot_cli_path, + get_gemini_cli_path, + get_hermes_cli_path, + get_kiro_cli_path, + get_opencode_cli_path, + get_opencode_mode, +) from ouroboros.mcp.tools.authoring_handlers import GenerateSeedHandler, InterviewHandler from ouroboros.mcp.tools.execution_handlers import ExecuteSeedHandler, StartExecuteSeedHandler from ouroboros.orchestrator import resolve_agent_runtime_backend +from ouroboros.providers.factory import resolve_llm_backend class AgentRuntimeBackend(str, Enum): # noqa: UP042 @@ -41,6 +61,15 @@ class AgentRuntimeBackend(str, Enum): # noqa: UP042 KIRO = "kiro" +class AutoBrakeOption(str, Enum): # noqa: UP042 + """Safety brake options for selected-driver interview answering.""" + + ON = "on" + OFF = "off" + + +_DRIVER_CHOICES_HELP = ", ".join(interview_driver_backend_choices()) + app = typer.Typer( name="auto", help="Run bounded full-quality ooo auto pipeline.", no_args_is_help=False ) @@ -56,6 +85,21 @@ def auto_command( AgentRuntimeBackend | None, typer.Option("--runtime", help="Execution runtime backend.", case_sensitive=False), ] = None, + driver: Annotated[ + str | None, + typer.Option( + "--driver", + help=f"Interview answer driver backend ({_DRIVER_CHOICES_HELP}).", + ), + ] = None, + brake: Annotated[ + AutoBrakeOption | None, + typer.Option( + "--brake", + help="Safety brake: on gates risky driver answers; off sends all answers automatically.", + case_sensitive=False, + ), + ] = None, max_interview_rounds: Annotated[ int | None, typer.Option( @@ -109,12 +153,15 @@ def auto_command( if not resume and (goal is None or not goal.strip()): print_error("goal is required unless --resume is provided") raise typer.Exit(1) + driver = _prompt_driver_if_missing(driver=driver, resume=resume) try: result = asyncio.run( _run_auto( goal=goal, resume=resume, runtime=runtime.value if runtime else None, + driver=driver, + brake=brake.value if brake else None, max_interview_rounds=max_interview_rounds, max_repair_rounds=max_repair_rounds, skip_run=skip_run, @@ -143,17 +190,86 @@ def _safe_default_cwd() -> Path: _DEFAULT_MAX_INTERVIEW_ROUNDS = 12 _DEFAULT_MAX_REPAIR_ROUNDS = 5 +_DRIVER_CLI_PATH_GETTERS = { + "claude": get_cli_path, + "codex": get_codex_cli_path, + "copilot": get_copilot_cli_path, + "gemini": get_gemini_cli_path, + "hermes": get_hermes_cli_path, + "kiro": get_kiro_cli_path, + "opencode": get_opencode_cli_path, +} + + +def _prompt_driver_if_missing(*, driver: str | None, resume: str | None) -> str | None: + """Prompt interactive new sessions to opt into a selected interview driver.""" + if driver is not None or resume: + return driver + if get_auto_interview_driver_backend() is not None: + return driver + if not sys.stdin.isatty(): + return driver + choices = _installed_interview_driver_backends() + if not choices: + print_info( + "No installed interview driver CLI detected; using deterministic auto answers." + ) + return driver + if not typer.confirm("Use an interview driver to answer auto questions?", default=True): + return driver + + default = "hermes" if "hermes" in choices else choices[0] + prompt = f"Interview driver ({', '.join(choices)})" + while True: + selected = typer.prompt(prompt, default=default).strip() + try: + resolve_interview_driver_backend(selected) + except ValueError: + print_error(f"Unsupported interview driver backend: {selected}") + continue + return selected + + +def _installed_interview_driver_backends() -> tuple[str, ...]: + """Return interview drivers with a configured or PATH-discoverable CLI.""" + installed: list[str] = [] + for backend in interview_driver_backend_choices(): + capability = get_backend_capability(backend) + if capability is None or capability.cli_name is None: + continue + getter = _DRIVER_CLI_PATH_GETTERS.get(capability.name) + configured_path = getter() if getter is not None else None + if _is_executable_command(configured_path) or shutil.which(capability.cli_name): + installed.append(backend) + return tuple(installed) + + +def _is_executable_command(value: str | None) -> bool: + """Return True when a configured command path/name is executable.""" + if value is None or not value.strip(): + return False + candidate = Path(value).expanduser() + if candidate.parent == Path("."): + return shutil.which(str(candidate)) is not None + return candidate.is_file() and os.access(candidate, os.X_OK) + async def _run_auto( *, goal: str | None, resume: str | None, runtime: str | None, - max_interview_rounds: int | None, - max_repair_rounds: int | None, - skip_run: bool, + driver: str | None = None, + brake: str | None = None, + max_interview_rounds: int | None = None, + max_repair_rounds: int | None = None, + skip_run: bool = False, ) -> AutoPipelineResult: store = AutoStore() + configured_driver = ( + None if driver is not None or resume else get_auto_interview_driver_backend() + ) + requested_driver = _resolve_driver_backend(driver or configured_driver) if resume: state = store.load(resume) persisted_runtime = state.runtime_backend @@ -193,6 +309,25 @@ async def _run_auto( else: state.max_repair_rounds = max_repair_rounds skip_run = skip_run or state.skip_run + persisted_driver = _normalize_persisted_driver_backend(state.interview_driver_backend) + if requested_driver is not None and persisted_driver not in { + None, + requested_driver, + }: + msg = ( + f"resume driver mismatch: session uses {state.interview_driver_backend}, " + f"but --driver {requested_driver} was requested" + ) + raise ValueError(msg) + driver = requested_driver or persisted_driver + brake_mode = AutoBrakeMode(brake or state.brake.value) + if brake is not None and brake_mode != state.brake: + msg = ( + f"resume brake mismatch: session uses {state.brake.value}, " + f"but --brake {brake_mode.value} was requested" + ) + raise ValueError(msg) + state.interview_driver_backend = driver else: if goal is None or not goal.strip(): raise ValueError("goal is required when not resuming") @@ -203,6 +338,8 @@ async def _run_auto( max_repair_rounds = _DEFAULT_MAX_REPAIR_ROUNDS state = AutoPipelineState(goal=goal.strip(), cwd=str(_safe_default_cwd())) state.runtime_backend = runtime + state.interview_driver_backend = requested_driver + state.brake = AutoBrakeMode(brake or AutoBrakeMode.ON.value) state.skip_run = skip_run state.max_interview_rounds = max_interview_rounds state.max_repair_rounds = max_repair_rounds @@ -228,8 +365,18 @@ async def _run_auto( start_execute = StartExecuteSeedHandler( execute_handler=execute_seed, agent_runtime_backend=runtime, opencode_mode=opencode_mode ) + selected_answerer = AutoAnswerer() + if state.interview_driver_backend is not None: + selected_answerer = DriverAutoAnswerer( + backend=state.interview_driver_backend, + brake=state.brake, + cwd=state.cwd, + timeout_seconds=60.0, + ) + state.interview_driver_backend = selected_answerer.backend driver = AutoInterviewDriver( HandlerInterviewBackend(interview, cwd=state.cwd), + answerer=selected_answerer, store=store, max_rounds=max_interview_rounds, ) @@ -247,6 +394,19 @@ async def _run_auto( return result +def _resolve_driver_backend(value: str | None) -> str | None: + """Resolve a driver backend to the LLM adapter name persisted in auto state.""" + if value is None or not value.strip(): + return None + resolve_interview_driver_backend(value) + return resolve_llm_backend(value) + + +def _normalize_persisted_driver_backend(value: str | None) -> str | None: + """Normalize older persisted driver identities for resume comparison.""" + return _resolve_driver_backend(value) + + def _print_status(state: AutoPipelineState) -> None: """Print a compact read-only summary for a persisted auto session.""" print_info("Auto session status") @@ -256,6 +416,9 @@ def _print_status(state: AutoPipelineState) -> None: console.print(f"Last progress at: {state.last_progress_at}") if state.interview_session_id: console.print(f"Interview session: {state.interview_session_id}") + if state.interview_driver_backend: + console.print(f"Interview driver: {state.interview_driver_backend}") + console.print(f"Brake: {state.brake.value}") console.print(f"Current interview round: {state.current_round}") if state.pending_question: question = state.pending_question.replace("\n", " ").strip() diff --git a/src/ouroboros/cli/commands/config.py b/src/ouroboros/cli/commands/config.py index 31b2b19fe..5221cb197 100644 --- a/src/ouroboros/cli/commands/config.py +++ b/src/ouroboros/cli/commands/config.py @@ -14,17 +14,26 @@ from ouroboros.backends import ( get_backend_capability, + interview_driver_backend_choices, + resolve_interview_driver_backend, runtime_backend_choices, ) from ouroboros.cli.formatters import console from ouroboros.cli.formatters.panels import print_error, print_info, print_success, print_warning from ouroboros.cli.formatters.tables import create_key_value_table, print_table +from ouroboros.providers.factory import resolve_llm_backend app = typer.Typer( name="config", help="Manage Ouroboros configuration.", no_args_is_help=True, ) +driver_app = typer.Typer( + name="driver", + help="Manage the default interview answer driver backend.", + no_args_is_help=False, +) +app.add_typer(driver_app, name="driver") _VALID_BACKENDS = runtime_backend_choices() _SWITCHABLE_BACKENDS = tuple( @@ -32,6 +41,7 @@ for backend in _VALID_BACKENDS if (capability := get_backend_capability(backend)) is not None and capability.switchable_runtime ) +_DRIVER_BACKENDS = interview_driver_backend_choices() def _load_config() -> tuple[dict, Path]: @@ -61,6 +71,7 @@ def _load_config() -> tuple[dict, Path]: # Guard against sections that should be dicts but aren't (e.g. orchestrator: []) _MAPPING_SECTIONS = ( "orchestrator", + "auto", "llm", "logging", "persistence", @@ -276,6 +287,63 @@ def _tracking_print_error(msg: str) -> None: console.print(f"[dim]CLI: {cli_path}[/dim]\n") +@driver_app.callback(invoke_without_command=True) +def driver_show() -> None: + """Show the configured default interview driver backend.""" + data, _config_path = _load_config() + current = data.get("auto", {}).get("interview_driver_backend") or "deterministic" + console.print(f"\n[bold]Current driver backend:[/bold] [cyan]{current}[/cyan]") + console.print(f"[dim]Set with: ouroboros config driver set <{'|'.join(_DRIVER_BACKENDS)}>\n[/dim]") + + +@driver_app.command("set") +def driver_set( + backend: Annotated[ + str, + typer.Argument(help=f"Interview driver backend ({', '.join(_DRIVER_BACKENDS)})."), + ], +) -> None: + """Set the default backend used by selected-driver auto interviews.""" + data, config_path = _load_config() + try: + resolved = resolve_interview_driver_backend(backend) + except ValueError: + print_error( + f"Unsupported interview driver backend: {backend}\n" + f"Supported driver backends: {', '.join(_DRIVER_BACKENDS)}" + ) + raise typer.Exit(1) from None + + auto_config = data.setdefault("auto", {}) + if not isinstance(auto_config, dict): + print_error("Invalid config section 'auto' (expected mapping)") + raise typer.Exit(1) + + old_value = auto_config.get("interview_driver_backend") + auto_config["interview_driver_backend"] = resolve_llm_backend(resolved) + _save_config(data, config_path) + + try: + from ouroboros.config.loader import load_config + + load_config() + except Exception as exc: + if old_value is not None: + auto_config["interview_driver_backend"] = old_value + else: + del auto_config["interview_driver_backend"] + _save_config(data, config_path) + print_error(f"Invalid driver backend - rolled back.\n{exc}") + raise typer.Exit(1) from None + + if old_value == auto_config["interview_driver_backend"]: + print_info(f"Already using driver backend {auto_config['interview_driver_backend']}.") + elif old_value is not None: + print_success(f"Driver backend: {old_value} -> {auto_config['interview_driver_backend']}") + else: + print_success(f"Driver backend: {auto_config['interview_driver_backend']}") + + @app.command() def init() -> None: """Initialize Ouroboros configuration. diff --git a/src/ouroboros/config/__init__.py b/src/ouroboros/config/__init__.py index 1e5f237ff..f007b0746 100644 --- a/src/ouroboros/config/__init__.py +++ b/src/ouroboros/config/__init__.py @@ -32,6 +32,7 @@ get_agent_runtime_backend, get_assertion_extraction_model, get_atomicity_model, + get_auto_interview_driver_backend, get_clarification_model, get_cli_path, get_codex_cli_path, @@ -66,6 +67,7 @@ load_credentials, ) from ouroboros.config.models import ( + AutoConfig, ClarificationConfig, ConsensusConfig, CredentialsConfig, @@ -97,6 +99,7 @@ "CredentialsConfig", "TierConfig", "ModelConfig", + "AutoConfig", "ProviderCredentials", "EconomicsConfig", "LLMConfig", @@ -125,6 +128,7 @@ "get_agent_permission_mode", "get_assertion_extraction_model", "get_atomicity_model", + "get_auto_interview_driver_backend", "get_llm_backend", "get_max_parallel_workers", "get_llm_permission_mode", diff --git a/src/ouroboros/config/loader.py b/src/ouroboros/config/loader.py index 9c8d62bfd..f389c9b9c 100644 --- a/src/ouroboros/config/loader.py +++ b/src/ouroboros/config/loader.py @@ -1101,6 +1101,19 @@ def get_llm_permission_mode(backend: str | None = None) -> str: return "acceptEdits" if _uses_opencode_backend(backend) else "default" +def get_auto_interview_driver_backend() -> str | None: + """Get the configured default backend for selected-driver auto interviews.""" + env_backend = os.environ.get("OUROBOROS_AUTO_DRIVER_BACKEND", "").strip().lower() + if env_backend: + return env_backend + + try: + config = load_config() + return config.auto.interview_driver_backend + except ConfigError: + return None + + def _resolve_llm_backend_for_models(backend: str | None = None) -> str: """Resolve the effective backend name for backend-aware model defaults.""" return (backend or get_llm_backend()).strip().lower() diff --git a/src/ouroboros/config/models.py b/src/ouroboros/config/models.py index b650596ce..57a817756 100644 --- a/src/ouroboros/config/models.py +++ b/src/ouroboros/config/models.py @@ -26,6 +26,7 @@ from pydantic import BaseModel, Field, field_validator +from ouroboros.backends import resolve_interview_driver_backend from ouroboros.orchestrator_stage import VALID_STAGE_KEYS @@ -239,6 +240,24 @@ class EvaluationConfig(BaseModel, frozen=True): assertion_extraction_model: str = "claude-sonnet-4-6" +class AutoConfig(BaseModel, frozen=True): + """Configuration for ``ooo auto`` defaults.""" + + interview_driver_backend: str | None = None + + @field_validator("interview_driver_backend") + @classmethod + def validate_interview_driver_backend(cls, value: str | None) -> str | None: + """Validate configured auto interview driver backend names and aliases.""" + if value is None: + return None + normalized = value.strip().lower() + if not normalized: + return None + resolve_interview_driver_backend(normalized) + return normalized + + class ConsensusConfig(BaseModel, frozen=True): """Phase 5 (Consensus) configuration. @@ -552,6 +571,7 @@ class OuroborosConfig(BaseModel, frozen=True): resilience: Phase 3 configuration evaluation: Phase 4 configuration consensus: Phase 5 configuration + auto: Defaults for ooo auto flows llm_profiles: Named provider-neutral profiles for LLM-only tasks llm_role_profiles: Mapping from logical task roles to profile names persistence: Storage configuration @@ -566,6 +586,7 @@ class OuroborosConfig(BaseModel, frozen=True): execution: ExecutionConfig = Field(default_factory=ExecutionConfig) resilience: ResilienceConfig = Field(default_factory=ResilienceConfig) evaluation: EvaluationConfig = Field(default_factory=EvaluationConfig) + auto: AutoConfig = Field(default_factory=AutoConfig) consensus: ConsensusConfig = Field(default_factory=ConsensusConfig) llm_profiles: dict[str, LLMTaskProfileConfig] = Field(default_factory=dict) llm_role_profiles: dict[str, str] = Field(default_factory=dict) diff --git a/src/ouroboros/mcp/tools/auto_handler.py b/src/ouroboros/mcp/tools/auto_handler.py index 9a2773c4c..8ef6d34a0 100644 --- a/src/ouroboros/mcp/tools/auto_handler.py +++ b/src/ouroboros/mcp/tools/auto_handler.py @@ -15,10 +15,13 @@ load_seed, save_seed, ) +from ouroboros.auto.answerer import AutoAnswerer +from ouroboros.auto.driver_answerer import DriverAutoAnswerer from ouroboros.auto.interview_driver import AutoInterviewDriver from ouroboros.auto.pipeline import AutoPipeline, AutoPipelineResult from ouroboros.auto.seed_repairer import SeedRepairer -from ouroboros.auto.state import AutoPipelineState, AutoStore +from ouroboros.auto.state import AutoBrakeMode, AutoPipelineState, AutoStore +from ouroboros.backends import resolve_interview_driver_backend from ouroboros.config import get_opencode_mode from ouroboros.core.types import Result from ouroboros.mcp.errors import MCPServerError, MCPToolError @@ -33,6 +36,7 @@ ToolInputType, ) from ouroboros.orchestrator import resolve_agent_runtime_backend +from ouroboros.providers.factory import resolve_llm_backend @dataclass(slots=True) @@ -68,16 +72,14 @@ def definition(self) -> MCPToolDefinition: MCPToolParameter( "max_interview_rounds", ToolInputType.INTEGER, - "Max interview rounds", + "Max interview rounds; omit on resume to keep the persisted bound", required=False, - default=12, ), MCPToolParameter( "max_repair_rounds", ToolInputType.INTEGER, - "Max repair rounds", + "Max repair rounds; omit on resume to keep the persisted bound", required=False, - default=5, ), MCPToolParameter( "skip_run", @@ -86,6 +88,18 @@ def definition(self) -> MCPToolDefinition: required=False, default=False, ), + MCPToolParameter( + "driver", + ToolInputType.STRING, + "Interview answer driver from llm.backend candidates", + required=False, + ), + MCPToolParameter( + "brake", + ToolInputType.STRING, + "Safety brake mode: on gates risky answers, off sends all answers", + required=False, + ), ), ) @@ -113,6 +127,20 @@ async def _run(self, arguments: dict[str, Any]) -> AutoPipelineResult: store = self.store or AutoStore() resume = arguments.get("resume") requested_skip_run = bool(arguments.get("skip_run", False)) + requested_driver = arguments.get("driver") + if requested_driver is not None and not isinstance(requested_driver, str): + raise ValueError("driver must be a string") + requested_driver = requested_driver.strip() if isinstance(requested_driver, str) else None + if requested_driver: + resolve_interview_driver_backend(requested_driver) + requested_driver = resolve_llm_backend(requested_driver) + else: + requested_driver = None + requested_brake_mode: AutoBrakeMode | None = None + if "brake" in arguments: + requested_brake = arguments.get("brake") + if requested_brake not in {None, ""}: + requested_brake_mode = AutoBrakeMode(str(requested_brake).strip().lower()) if isinstance(resume, str) and resume: state = store.load(resume) cwd = state.cwd @@ -123,9 +151,38 @@ async def _run(self, arguments: dict[str, Any]) -> AutoPipelineResult: opencode_mode = _resolved_opencode_mode( runtime_backend, state.opencode_mode or self.opencode_mode ) - max_interview_rounds = state.max_interview_rounds - max_repair_rounds = state.max_repair_rounds + requested_max_interview_rounds = _optional_positive_int_arg( + arguments, "max_interview_rounds" + ) + requested_max_repair_rounds = _optional_positive_int_arg(arguments, "max_repair_rounds") + max_interview_rounds = _effective_resume_bound( + "max_interview_rounds", + requested_max_interview_rounds, + state.max_interview_rounds, + ) + max_repair_rounds = _effective_resume_bound( + "max_repair_rounds", + requested_max_repair_rounds, + state.max_repair_rounds, + ) + state.max_interview_rounds = max_interview_rounds + state.max_repair_rounds = max_repair_rounds skip_run = requested_skip_run or state.skip_run + persisted_driver = _normalize_persisted_driver_backend(state.interview_driver_backend) + if requested_driver is not None and persisted_driver not in { + None, + requested_driver, + }: + raise ValueError( + f"resume driver mismatch: session uses {state.interview_driver_backend}, " + f"but driver {requested_driver} was requested" + ) + state.interview_driver_backend = requested_driver or persisted_driver + if requested_brake_mode is not None and requested_brake_mode != state.brake: + raise ValueError( + f"resume brake mismatch: session uses {state.brake.value}, " + f"but brake {requested_brake_mode.value} was requested" + ) else: goal = arguments.get("goal") if not isinstance(goal, str) or not goal.strip(): @@ -139,6 +196,8 @@ async def _run(self, arguments: dict[str, Any]) -> AutoPipelineResult: state = AutoPipelineState(goal=goal.strip(), cwd=cwd) state.max_interview_rounds = max_interview_rounds state.max_repair_rounds = max_repair_rounds + state.interview_driver_backend = requested_driver + state.brake = requested_brake_mode or AutoBrakeMode.ON state.runtime_backend = runtime_backend state.opencode_mode = opencode_mode state.skip_run = skip_run @@ -165,8 +224,18 @@ async def _run(self, arguments: dict[str, Any]) -> AutoPipelineResult: mcp_tool_prefix=self.mcp_tool_prefix, ) + selected_answerer = AutoAnswerer() + if state.interview_driver_backend is not None: + selected_answerer = DriverAutoAnswerer( + backend=state.interview_driver_backend, + brake=state.brake, + cwd=cwd, + timeout_seconds=60.0, + ) + state.interview_driver_backend = selected_answerer.backend driver = AutoInterviewDriver( HandlerInterviewBackend(interview_handler, cwd=cwd), + answerer=selected_answerer, store=store, max_rounds=max_interview_rounds, ) @@ -208,6 +277,10 @@ def _result_meta(result: AutoPipelineResult) -> dict[str, Any]: meta["run_handoff_status"] = result.run_handoff_status if result.run_handoff_guidance: meta["run_handoff_guidance"] = result.run_handoff_guidance + if result.max_interview_rounds is not None: + meta["max_interview_rounds"] = result.max_interview_rounds + if result.max_repair_rounds is not None: + meta["max_repair_rounds"] = result.max_repair_rounds return meta @@ -230,6 +303,33 @@ def _positive_int_arg(arguments: dict[str, Any], name: str, default: int) -> int return value +def _optional_positive_int_arg(arguments: dict[str, Any], name: str) -> int | None: + if name not in arguments: + return None + value = arguments.get(name) + if value in {None, ""}: + return None + if isinstance(value, bool) or not isinstance(value, int): + msg = f"{name} must be a positive integer" + raise ValueError(msg) + if value <= 0: + msg = f"{name} must be >= 1" + raise ValueError(msg) + return value + + +def _effective_resume_bound(name: str, requested: int | None, persisted: int) -> int: + if requested is None: + return persisted + if requested < persisted: + msg = ( + f"{name} {requested} is lower than the persisted bound ({persisted}); " + "refuse to tighten a bound on resume" + ) + raise ValueError(msg) + return requested + + def _safe_default_cwd() -> Path: cwd = Path.cwd() if cwd == Path("/"): @@ -237,6 +337,14 @@ def _safe_default_cwd() -> Path: return _require_writable_cwd(cwd) +def _normalize_persisted_driver_backend(value: str | None) -> str | None: + """Normalize older persisted driver identities for resume comparison.""" + if value is None or not value.strip(): + return None + resolve_interview_driver_backend(value) + return resolve_llm_backend(value) + + def _resolve_cwd(value: object) -> Path: if value is None or value == "": return _safe_default_cwd() diff --git a/tests/unit/auto/test_driver_answerer.py b/tests/unit/auto/test_driver_answerer.py new file mode 100644 index 000000000..b810bc6ae --- /dev/null +++ b/tests/unit/auto/test_driver_answerer.py @@ -0,0 +1,296 @@ +from __future__ import annotations + +import pytest + +from ouroboros.auto.answerer import AutoAnswerSource +from ouroboros.auto.driver_answerer import DriverAutoAnswerer, classify_interview_answer_risk +from ouroboros.auto.ledger import SeedDraftLedger +from ouroboros.auto.state import AutoBrakeMode +from ouroboros.core.types import Result +from ouroboros.providers.base import CompletionResponse, UsageInfo + + +class FakeAdapter: + def __init__(self, content: str = "Use the existing project conventions.") -> None: + self.content = content + self.prompts: list[str] = [] + + async def complete(self, messages, config): # noqa: ANN001 + self.prompts.append(messages[-1].content) + return Result.ok( + CompletionResponse( + content=self.content, + model="fake", + usage=UsageInfo(prompt_tokens=1, completion_tokens=1, total_tokens=2), + ) + ) + + +def test_classifies_blocker_questions_as_risky() -> None: + ledger = SeedDraftLedger.from_goal("Deploy a service") + answerer = DriverAutoAnswerer(backend="codex", brake=AutoBrakeMode.OFF, adapter=FakeAdapter()) + scaffold = answerer.baseline.answer("Which production credentials should we use?", ledger) + + assert classify_interview_answer_risk("Which production credentials should we use?", scaffold) + + +@pytest.mark.parametrize( + "question", + [ + "How should users add a task?", + "What should the add command do on duplicate input?", + "Should the form let admins add a row?", + "How do we add an item to the cart?", + ], +) +def test_routine_crud_add_questions_are_not_scope_risky(question: str) -> None: + assert classify_interview_answer_risk(question, scaffold=None) is None + + +@pytest.mark.parametrize( + "question", + [ + "Should we add a feature for offline mode?", + "Do we add capability for keyboard shortcuts?", + "Is it worth adding an epic for an undo workflow?", + "Should we add support for legacy clients?", + "Should we add features for power users?", + ], +) +def test_scope_add_questions_are_still_risky(question: str) -> None: + assert ( + classify_interview_answer_risk(question, scaffold=None) + == "scope or product/business tradeoff" + ) + + +@pytest.mark.asyncio +async def test_driver_answerer_brake_off_answers_risky_question() -> None: + ledger = SeedDraftLedger.from_goal("Deploy a service") + adapter = FakeAdapter( + "Assumption: use a placeholder secret reference, never a real credential." + ) + answerer = DriverAutoAnswerer(backend="codex", brake=AutoBrakeMode.OFF, adapter=adapter) + + answer = await answerer.answer("Which production credentials should we use?", ledger) + + assert answer.source == AutoAnswerSource.DRIVER + assert answer.blocker is None + assert "driver=codex" in answer.text + assert "brake=off" in answer.text + assert "risk=" in answer.text + assert answer.metadata.risk == "destructive or financial/production choice" + assert answer.metadata.confidence == answer.confidence + assert answer.metadata.provenance == ( + "driver:codex", + "brake:off", + "scaffold_source:conservative_default", + ) + assert adapter.prompts + + +@pytest.mark.asyncio +async def test_driver_answerer_ledger_values_reflect_driver_answer_without_blocking_loop() -> None: + """Driver mode must not let the persisted ledger and the interview + transcript diverge: the ledger entry value must contain the driver's + freeform answer verbatim. At the same time the entry status must NOT + block the interview loop's Seed-ready check (CONFLICTING/MISSING/ + WEAK/BLOCKED count as open gaps), so we mark the entry INFERRED with + low confidence and an ``auto_interview_transcript`` evidence marker for + grading/A-grade gates to consume. The original scaffold value is + preserved as audit evidence so divergence is never silently lost. + """ + from ouroboros.auto.ledger import LedgerStatus + + ledger = SeedDraftLedger.from_goal("Build a CLI") + question = "Which runtime and framework should be used?" + driver_text = "Use Typer and verify with pytest." + adapter = FakeAdapter(driver_text) + answerer = DriverAutoAnswerer(backend="codex", brake=AutoBrakeMode.OFF, adapter=adapter) + scaffold = answerer.baseline.answer(question, ledger) + + answer = await answerer.answer(question, ledger) + + assert answer.ledger_updates + structural_updates = [ + (section, entry) + for section, entry in answer.ledger_updates + if not entry.key.startswith("risk.auto_driver") + ] + # The structural keys (section + key + source) match the scaffold so + # downstream Seed generation stays section-aware. + assert [(section, entry.key, entry.source) for section, entry in structural_updates] == [ + (section, entry.key, entry.source) for section, entry in scaffold.ledger_updates + ] + blocking_statuses = { + LedgerStatus.MISSING, + LedgerStatus.WEAK, + LedgerStatus.CONFLICTING, + LedgerStatus.BLOCKED, + } + for _section, entry in structural_updates: + # Persisted ledger value carries the driver answer verbatim → no + # divergence between ledger and interview transcript. + assert driver_text in entry.value + # Status must not block ``is_seed_ready`` (otherwise the interview + # loop would never converge for a selected-driver session). + assert entry.status == LedgerStatus.INFERRED + assert entry.status not in blocking_statuses + # Provenance signals for downstream grading / A-grade verification. + assert entry.confidence <= 0.4 + assert "driver:codex" in entry.evidence + assert "auto_interview_transcript" in entry.evidence + # Original scaffold values are preserved as audit evidence (rationale + # and a ``scaffold_value:...`` evidence tag) so divergence between + # transcript and ledger is never silently lost. + scaffold_values = {entry.value for _section, entry in scaffold.ledger_updates if entry.value} + if scaffold_values: + rationale_text = " ".join(entry.rationale or "" for _section, entry in structural_updates) + evidence_tags = {tag for _section, entry in structural_updates for tag in entry.evidence} + assert any(value in rationale_text for value in scaffold_values) + assert any(f"scaffold_value:{value}" in evidence_tags for value in scaffold_values) + + +@pytest.mark.asyncio +async def test_driver_answerer_ledger_does_not_block_seed_ready_convergence() -> None: + """Regression: applying the driver answerer's ledger updates must keep + the interview loop's ``is_seed_ready`` reachable. Marking entries + CONFLICTING (an earlier attempt at this fix) treats the section as an + open gap, so a selected-driver session would never converge to seed + generation. INFERRED is the correct status for "answered with a + driver-derived value that grading should later verify". + """ + ledger = SeedDraftLedger.from_goal("Build a CLI") + driver_text = "Use Typer + pytest, target Python 3.12, document via README." + adapter = FakeAdapter(driver_text) + answerer = DriverAutoAnswerer(backend="codex", brake=AutoBrakeMode.OFF, adapter=adapter) + + for question in ( + "Which runtime and framework should be used?", + "What user actions must work?", + "How will success be measured?", + "What should be out of scope?", + ): + answer = await answerer.answer(question, ledger) + answerer.apply(answer, ledger, question=question) + + # The driver-only answers must not leave the ledger in a state the + # interview loop treats as blocked. Any remaining gaps must be due to + # genuinely missing required sections, not the driver mode itself. + blocking_statuses = { + # Mirror SeedDraftLedger.open_gaps's blocking set. + "missing", + "weak", + "conflicting", + "blocked", + } + section_statuses = ledger.section_statuses() + driver_blocked = [ + name for name, status in section_statuses.items() if status.value in blocking_statuses + ] + # All sections the driver populated must have advanced past blocking + # statuses; this is the contract that broke under the CONFLICTING + # variant of the fix. + assert "scope" not in driver_blocked + assert "non_goals" not in driver_blocked + assert "constraints" not in driver_blocked + + +@pytest.mark.asyncio +async def test_driver_answerer_preserves_scaffold_ledger_source_categories() -> None: + from ouroboros.auto.ledger import LedgerSource + + ledger = SeedDraftLedger.from_goal("Build a local CLI") + adapter = FakeAdapter("Keep the MVP local-only.") + answerer = DriverAutoAnswerer(backend="codex", brake=AutoBrakeMode.OFF, adapter=adapter) + + answer = await answerer.answer("What should be out of scope?", ledger) + + non_goals = [entry for section, entry in answer.ledger_updates if section == "non_goals"] + assert non_goals + assert non_goals[0].source == LedgerSource.NON_GOAL + + +@pytest.mark.asyncio +async def test_driver_answerer_constructs_adapter_with_session_cwd(monkeypatch, tmp_path) -> None: + from ouroboros.auto import driver_answerer as module + + captured: dict[str, object] = {} + adapter = FakeAdapter("Use the checked-out project conventions.") + + def fake_create_llm_adapter(**kwargs): # noqa: ANN003, ANN202 + captured.update(kwargs) + return adapter + + monkeypatch.setattr(module, "create_llm_adapter", fake_create_llm_adapter) + ledger = SeedDraftLedger.from_goal("Build a CLI") + answerer = DriverAutoAnswerer(backend="codex", brake=AutoBrakeMode.OFF, cwd=tmp_path) + + answer = await answerer.answer("Which runtime and framework should be used?", ledger) + + assert answer.source == AutoAnswerSource.DRIVER + assert captured["cwd"] == tmp_path + assert captured["allowed_tools"] == [] + + +@pytest.mark.asyncio +async def test_hermes_driver_does_not_request_unsupported_tool_envelope( + monkeypatch, tmp_path +) -> None: + from ouroboros.auto import driver_answerer as module + + captured: dict[str, object] = {} + adapter = FakeAdapter("Use the checked-out project conventions.") + + def fake_create_llm_adapter(**kwargs): # noqa: ANN003, ANN202 + captured.update(kwargs) + return adapter + + monkeypatch.setattr(module, "create_llm_adapter", fake_create_llm_adapter) + ledger = SeedDraftLedger.from_goal("Build a CLI") + answerer = DriverAutoAnswerer(backend="hermes", brake=AutoBrakeMode.OFF, cwd=tmp_path) + + answer = await answerer.answer("Which runtime and framework should be used?", ledger) + + assert answer.source == AutoAnswerSource.DRIVER + assert captured["allowed_tools"] is None + + +@pytest.mark.asyncio +async def test_driver_answerer_risky_brake_off_records_active_risk() -> None: + from ouroboros.auto.ledger import LedgerSource, LedgerStatus + + ledger = SeedDraftLedger.from_goal("Deploy a service") + adapter = FakeAdapter("Use a placeholder secret reference, never a real credential.") + answerer = DriverAutoAnswerer(backend="codex", brake=AutoBrakeMode.OFF, adapter=adapter) + + answer = await answerer.answer("Which production credentials should we use?", ledger) + + risks = [ + entry + for _section, entry in answer.ledger_updates + if entry.key.startswith("risk.auto_driver") + ] + assert risks + assert risks[0].source == LedgerSource.ASSUMPTION + assert risks[0].status == LedgerStatus.INFERRED + + +@pytest.mark.asyncio +async def test_driver_answerer_brake_on_gates_risky_question() -> None: + ledger = SeedDraftLedger.from_goal("Deploy a service") + adapter = FakeAdapter("This should not be called") + answerer = DriverAutoAnswerer(backend="codex", brake=AutoBrakeMode.ON, adapter=adapter) + + answer = await answerer.answer("Which production credentials should we use?", ledger) + + assert answer.blocker is not None + assert "requires approval" in answer.blocker.reason + assert answer.metadata.risk == "destructive or financial/production choice" + assert answer.metadata.confidence == 1.0 + assert answer.metadata.provenance == ( + "driver:codex", + "brake:on", + "scaffold_source:conservative_default", + ) + assert adapter.prompts == [] diff --git a/tests/unit/auto/test_surface.py b/tests/unit/auto/test_surface.py index 85a70c989..39ab81735 100644 --- a/tests/unit/auto/test_surface.py +++ b/tests/unit/auto/test_surface.py @@ -106,8 +106,11 @@ def test_auto_skill_frontmatter_dispatches_to_mcp_tool() -> None: assert 'goal: "$goal"' in content assert 'resume: "$resume"' in content assert 'skip_run: "$skip_run"' in content + assert 'driver: "$driver"' in content + assert 'brake: "$brake"' in content assert 'max_interview_rounds: "$max_interview_rounds"' in content assert "ooo auto --resume" in content + assert "--driver hermes --brake on" in content assert "--show-ledger" in content @@ -116,7 +119,21 @@ def test_auto_handler_schema_contains_hang_safe_options() -> None: assert definition.name == "ouroboros_auto" names = {param.name for param in definition.parameters} - assert {"goal", "resume", "max_interview_rounds", "max_repair_rounds", "skip_run"} <= names + assert { + "goal", + "resume", + "max_interview_rounds", + "max_repair_rounds", + "skip_run", + "driver", + "brake", + } <= names + params = {param.name: param for param in definition.parameters} + assert params["max_interview_rounds"].default is None + assert params["max_repair_rounds"].default is None + assert "persisted bound" in params["max_interview_rounds"].description + assert "persisted bound" in params["max_repair_rounds"].description + assert params["brake"].default is None class _FakeInterviewHandler: @@ -1196,6 +1213,293 @@ def __init__(self, *args, **kwargs): # noqa: ANN002, ANN003, ARG002 assert captured["repair_rounds"] == 3 +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("arguments", "expected_interview_rounds", "expected_repair_rounds"), + [ + ({}, 4, 6), + ({"max_interview_rounds": 4, "max_repair_rounds": 6}, 4, 6), + ({"max_interview_rounds": 7, "max_repair_rounds": 8}, 7, 8), + ({"max_interview_rounds": 7}, 7, 6), + ({"max_repair_rounds": 8}, 4, 8), + ], +) +async def test_auto_handler_resume_resolves_effective_loop_bounds( + monkeypatch, + tmp_path, + arguments, + expected_interview_rounds, + expected_repair_rounds, +) -> None: + from ouroboros.auto.pipeline import AutoPipelineResult + from ouroboros.auto.state import AutoPipelineState, AutoStore + from ouroboros.mcp.tools import auto_handler as auto_module + + store = AutoStore(tmp_path) + state = AutoPipelineState(goal="Build a CLI", cwd=str(tmp_path / "project")) + state.runtime_backend = "codex" + state.max_interview_rounds = 4 + state.max_repair_rounds = 6 + store.save(state) + captured: dict[str, object] = {} + + class FakePipeline: + def __init__(self, *args, **kwargs): # noqa: ANN002, ANN003 + captured["driver_rounds"] = args[0].max_rounds + captured["repair_rounds"] = kwargs["repairer"].max_repair_rounds + + async def run(self, run_state): # noqa: ANN001 + return AutoPipelineResult( + status="complete", + auto_session_id=run_state.auto_session_id, + phase="complete", + max_interview_rounds=run_state.max_interview_rounds, + max_repair_rounds=run_state.max_repair_rounds, + ) + + class FakeHandler: + def __init__(self, *args, **kwargs): # noqa: ANN002, ANN003, ARG002 + pass + + monkeypatch.setattr(auto_module, "AutoPipeline", FakePipeline) + monkeypatch.setattr(auto_module, "InterviewHandler", FakeHandler) + monkeypatch.setattr(auto_module, "GenerateSeedHandler", FakeHandler) + monkeypatch.setattr(auto_module, "ExecuteSeedHandler", FakeHandler) + monkeypatch.setattr(auto_module, "StartExecuteSeedHandler", FakeHandler) + + result = await AutoHandler(store=store).handle({"resume": state.auto_session_id, **arguments}) + + assert result.is_ok + assert captured["driver_rounds"] == expected_interview_rounds + assert captured["repair_rounds"] == expected_repair_rounds + assert result.value.meta["max_interview_rounds"] == expected_interview_rounds + assert result.value.meta["max_repair_rounds"] == expected_repair_rounds + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("field_name", "lower_value"), + [("max_interview_rounds", 3), ("max_repair_rounds", 5)], +) +async def test_auto_handler_resume_rejects_lower_loop_bounds_without_persisting( + tmp_path, field_name, lower_value +) -> None: + from ouroboros.auto.state import AutoPipelineState, AutoStore + + store = AutoStore(tmp_path) + state = AutoPipelineState(goal="Build a CLI", cwd=str(tmp_path / "project")) + state.runtime_backend = "codex" + state.max_interview_rounds = 4 + state.max_repair_rounds = 6 + store.save(state) + + result = await AutoHandler(store=store).handle( + {"resume": state.auto_session_id, field_name: lower_value} + ) + + restored = store.load(state.auto_session_id) + assert result.is_err + assert field_name in str(result.error) + assert "lower than the persisted bound" in str(result.error) + assert restored.max_interview_rounds == 4 + assert restored.max_repair_rounds == 6 + + +@pytest.mark.asyncio +async def test_auto_handler_without_driver_keeps_deterministic_answerer( + monkeypatch, tmp_path +) -> None: + from ouroboros.auto.answerer import AutoAnswerer + from ouroboros.auto.pipeline import AutoPipelineResult + from ouroboros.mcp.tools import auto_handler as auto_module + + captured: dict[str, object] = {} + + class FakePipeline: + def __init__(self, driver, _seed_generator, **kwargs): # noqa: ANN001, ANN003 + captured["answerer_type"] = type(driver.answerer) + + async def run(self, run_state): # noqa: ANN001 + captured["state_driver"] = run_state.interview_driver_backend + captured["state_brake"] = run_state.brake.value + return AutoPipelineResult( + status="complete", + auto_session_id=run_state.auto_session_id, + phase="complete", + ) + + class FakeHandler: + def __init__(self, *args, **kwargs): # noqa: ANN002, ANN003, ARG002 + pass + + monkeypatch.setattr(auto_module, "AutoPipeline", FakePipeline) + monkeypatch.setattr(auto_module, "InterviewHandler", FakeHandler) + monkeypatch.setattr(auto_module, "GenerateSeedHandler", FakeHandler) + monkeypatch.setattr(auto_module, "ExecuteSeedHandler", FakeHandler) + monkeypatch.setattr(auto_module, "StartExecuteSeedHandler", FakeHandler) + + result = await AutoHandler(llm_backend="codex_cli").handle( + {"goal": "Build a CLI", "cwd": str(tmp_path)} + ) + + assert result.is_ok + assert captured == { + "answerer_type": AutoAnswerer, + "state_driver": None, + "state_brake": "on", + } + + +@pytest.mark.asyncio +async def test_auto_handler_resume_preserves_driver_and_brake_when_unspecified( + monkeypatch, tmp_path +) -> None: + from ouroboros.auto.pipeline import AutoPipelineResult + from ouroboros.auto.state import AutoBrakeMode, AutoPipelineState, AutoStore + from ouroboros.mcp.tools import auto_handler as auto_module + + store = AutoStore(tmp_path) + state = AutoPipelineState(goal="Build a CLI", cwd=str(tmp_path / "project")) + state.runtime_backend = "codex" + state.interview_driver_backend = "opencode" + state.brake = AutoBrakeMode.OFF + store.save(state) + captured: dict[str, object] = {} + + class FakePipeline: + def __init__(self, driver, _seed_generator, **kwargs): # noqa: ANN001, ANN003 + captured["answerer_backend"] = driver.answerer.backend + captured["answerer_brake"] = driver.answerer.brake.value + + async def run(self, run_state): # noqa: ANN001 + captured["state_driver"] = run_state.interview_driver_backend + captured["state_brake"] = run_state.brake.value + return AutoPipelineResult( + status="complete", + auto_session_id=run_state.auto_session_id, + phase="complete", + ) + + class FakeHandler: + def __init__(self, *args, **kwargs): # noqa: ANN002, ANN003, ARG002 + pass + + monkeypatch.setattr(auto_module, "AutoPipeline", FakePipeline) + monkeypatch.setattr(auto_module, "InterviewHandler", FakeHandler) + monkeypatch.setattr(auto_module, "GenerateSeedHandler", FakeHandler) + monkeypatch.setattr(auto_module, "ExecuteSeedHandler", FakeHandler) + monkeypatch.setattr(auto_module, "StartExecuteSeedHandler", FakeHandler) + + result = await AutoHandler(store=store).handle({"resume": state.auto_session_id}) + + assert result.is_ok + assert captured == { + "answerer_backend": "opencode", + "answerer_brake": "off", + "state_driver": "opencode", + "state_brake": "off", + } + + +@pytest.mark.asyncio +async def test_auto_handler_resume_rejects_brake_mismatch(tmp_path) -> None: + from ouroboros.auto.state import AutoBrakeMode, AutoPipelineState, AutoStore + + store = AutoStore(tmp_path) + state = AutoPipelineState(goal="Build a CLI", cwd=str(tmp_path / "project")) + state.runtime_backend = "codex" + state.interview_driver_backend = "codex" + state.brake = AutoBrakeMode.OFF + store.save(state) + + result = await AutoHandler(store=store).handle({"resume": state.auto_session_id, "brake": "on"}) + + assert result.is_err + assert "resume brake mismatch" in str(result.error) + + +@pytest.mark.asyncio +async def test_auto_handler_resume_allows_normalized_driver_alias(monkeypatch, tmp_path) -> None: + from ouroboros.auto.pipeline import AutoPipelineResult + from ouroboros.auto.state import AutoPipelineState, AutoStore + from ouroboros.mcp.tools import auto_handler as auto_module + + store = AutoStore(tmp_path) + state = AutoPipelineState(goal="Build a CLI", cwd=str(tmp_path / "project")) + state.runtime_backend = "codex" + state.interview_driver_backend = "codex" + store.save(state) + + class FakePipeline: + def __init__(self, *args, **kwargs): # noqa: ANN002, ANN003, ARG002 + pass + + async def run(self, run_state): # noqa: ANN001 + return AutoPipelineResult( + status="complete", + auto_session_id=run_state.auto_session_id, + phase="complete", + ) + + class FakeHandler: + def __init__(self, *args, **kwargs): # noqa: ANN002, ANN003, ARG002 + pass + + monkeypatch.setattr(auto_module, "AutoPipeline", FakePipeline) + monkeypatch.setattr(auto_module, "InterviewHandler", FakeHandler) + monkeypatch.setattr(auto_module, "GenerateSeedHandler", FakeHandler) + monkeypatch.setattr(auto_module, "ExecuteSeedHandler", FakeHandler) + monkeypatch.setattr(auto_module, "StartExecuteSeedHandler", FakeHandler) + + result = await AutoHandler(store=store).handle( + {"resume": state.auto_session_id, "driver": "codex_cli"} + ) + + assert result.is_ok + + +@pytest.mark.asyncio +async def test_auto_handler_resume_accepts_claude_alias_for_legacy_claude( + monkeypatch, tmp_path +) -> None: + from ouroboros.auto.pipeline import AutoPipelineResult + from ouroboros.auto.state import AutoPipelineState, AutoStore + from ouroboros.mcp.tools import auto_handler as auto_module + + store = AutoStore(tmp_path) + state = AutoPipelineState(goal="Build a CLI", cwd=str(tmp_path / "project")) + state.runtime_backend = "codex" + state.interview_driver_backend = "claude" + store.save(state) + + class FakePipeline: + def __init__(self, *args, **kwargs): # noqa: ANN002, ANN003, ARG002 + pass + + async def run(self, run_state): # noqa: ANN001 + return AutoPipelineResult( + status="complete", + auto_session_id=run_state.auto_session_id, + phase="complete", + ) + + class FakeHandler: + def __init__(self, *args, **kwargs): # noqa: ANN002, ANN003, ARG002 + pass + + monkeypatch.setattr(auto_module, "AutoPipeline", FakePipeline) + monkeypatch.setattr(auto_module, "InterviewHandler", FakeHandler) + monkeypatch.setattr(auto_module, "GenerateSeedHandler", FakeHandler) + monkeypatch.setattr(auto_module, "ExecuteSeedHandler", FakeHandler) + monkeypatch.setattr(auto_module, "StartExecuteSeedHandler", FakeHandler) + + result = await AutoHandler(store=store).handle( + {"resume": state.auto_session_id, "driver": "claude"} + ) + + assert result.is_ok + + def test_auto_state_persists_loop_bounds() -> None: from ouroboros.auto.state import AutoPipelineState diff --git a/tests/unit/cli/test_auto_command.py b/tests/unit/cli/test_auto_command.py index 2a635360a..ed8405bf4 100644 --- a/tests/unit/cli/test_auto_command.py +++ b/tests/unit/cli/test_auto_command.py @@ -51,12 +51,117 @@ def consume(coro): assert "auto_test" in result.output +def test_auto_prompts_for_driver_when_interactive_and_unconfigured() -> None: + """Interactive plain `ooo auto` should offer selected-driver mode.""" + from ouroboros.cli.commands.auto import _prompt_driver_if_missing + + with ( + patch("ouroboros.cli.commands.auto.get_auto_interview_driver_backend", return_value=None), + patch("ouroboros.cli.commands.auto.sys.stdin.isatty", return_value=True), + patch( + "ouroboros.cli.commands.auto._installed_interview_driver_backends", + return_value=("hermes",), + ), + patch("ouroboros.cli.commands.auto.typer.confirm", return_value=True) as confirm, + patch("ouroboros.cli.commands.auto.typer.prompt", return_value="hermes") as prompt, + ): + driver = _prompt_driver_if_missing(driver=None, resume=None) + + assert driver == "hermes" + confirm.assert_called_once() + prompt.assert_called_once() + + +def test_auto_prompt_decline_keeps_deterministic_driver() -> None: + """Declining the interactive driver prompt preserves existing deterministic mode.""" + from ouroboros.cli.commands.auto import _prompt_driver_if_missing + + with ( + patch("ouroboros.cli.commands.auto.get_auto_interview_driver_backend", return_value=None), + patch("ouroboros.cli.commands.auto.sys.stdin.isatty", return_value=True), + patch( + "ouroboros.cli.commands.auto._installed_interview_driver_backends", + return_value=("hermes",), + ), + patch("ouroboros.cli.commands.auto.typer.confirm", return_value=False), + patch("ouroboros.cli.commands.auto.typer.prompt") as prompt, + ): + driver = _prompt_driver_if_missing(driver=None, resume=None) + + assert driver is None + prompt.assert_not_called() + + +def test_auto_prompt_skips_when_no_driver_cli_is_installed() -> None: + """Do not offer selected-driver mode when no local driver executable is available.""" + from ouroboros.cli.commands.auto import _prompt_driver_if_missing + + with ( + patch("ouroboros.cli.commands.auto.get_auto_interview_driver_backend", return_value=None), + patch("ouroboros.cli.commands.auto.sys.stdin.isatty", return_value=True), + patch( + "ouroboros.cli.commands.auto._installed_interview_driver_backends", + return_value=(), + ), + patch("ouroboros.cli.commands.auto.typer.confirm") as confirm, + ): + driver = _prompt_driver_if_missing(driver=None, resume=None) + + assert driver is None + confirm.assert_not_called() + + +def test_installed_driver_choices_exclude_missing_hermes_cli() -> None: + """Hermes should not become the default prompt choice unless its CLI is available.""" + from ouroboros.cli.commands.auto import _installed_interview_driver_backends + + path_getters = { + "claude": lambda: None, + "codex": lambda: None, + "copilot": lambda: None, + "gemini": lambda: None, + "hermes": lambda: None, + "kiro": lambda: None, + "opencode": lambda: None, + } + + def fake_which(command: str) -> str | None: + return "/usr/local/bin/codex" if command == "codex" else None + + with ( + patch("ouroboros.cli.commands.auto._DRIVER_CLI_PATH_GETTERS", path_getters), + patch("ouroboros.cli.commands.auto.shutil.which", side_effect=fake_which), + ): + choices = _installed_interview_driver_backends() + + assert choices == ("codex",) + + +def test_auto_prompt_skips_when_configured_driver_exists() -> None: + """Configured defaults should avoid an extra prompt.""" + from ouroboros.cli.commands.auto import _prompt_driver_if_missing + + with ( + patch( + "ouroboros.cli.commands.auto.get_auto_interview_driver_backend", + return_value="hermes", + ), + patch("ouroboros.cli.commands.auto.sys.stdin.isatty", return_value=True), + patch("ouroboros.cli.commands.auto.typer.confirm") as confirm, + ): + driver = _prompt_driver_if_missing(driver=None, resume=None) + + assert driver is None + confirm.assert_not_called() + + def _persisted_state_with_bounds(tmp_path, *, max_interview_rounds: int, max_repair_rounds: int): """Persist a blocked auto session with a known loop budget for resume tests.""" from ouroboros.auto.state import AutoPhase, AutoPipelineState, AutoStore state = AutoPipelineState(goal="Build a CLI", cwd=str(tmp_path)) state.runtime_backend = "claude" + state.interview_driver_backend = "codex" state.max_interview_rounds = max_interview_rounds state.max_repair_rounds = max_repair_rounds state.skip_run = True @@ -184,3 +289,341 @@ def test_resume_rejects_lower_bound_override(tmp_path) -> None: skip_run=False, ) ) + + +def test_resume_preserves_persisted_driver_and_brake_when_unspecified(tmp_path) -> None: + """Resume without driver/brake flags keeps the original interview semantics.""" + import asyncio + + from ouroboros.auto.state import AutoBrakeMode + from ouroboros.cli.commands.auto import _run_auto + + state, store, session_id = _persisted_state_with_bounds( + tmp_path, max_interview_rounds=2, max_repair_rounds=1 + ) + state.brake = AutoBrakeMode.OFF + store.save(state) + captured: dict[str, str] = {} + + async def fake_pipeline_run(self, state): # noqa: ARG001 + captured["driver"] = state.interview_driver_backend + captured["brake"] = state.brake.value + captured["answerer_backend"] = self.interview_driver.answerer.backend + captured["answerer_brake"] = self.interview_driver.answerer.brake.value + return AutoPipelineResult( + status="complete", + auto_session_id=session_id, + phase="complete", + grade="A", + ) + + with ( + patch("ouroboros.cli.commands.auto.AutoStore") as store_cls, + patch("ouroboros.cli.commands.auto.AutoPipeline.run", new=fake_pipeline_run), + ): + store_cls.return_value = store + result = asyncio.run( + _run_auto( + goal=None, + resume=session_id, + runtime=None, + skip_run=False, + ) + ) + + assert result.status == "complete" + assert captured == { + "driver": "codex", + "brake": "off", + "answerer_backend": "codex", + "answerer_brake": "off", + } + + +def test_resume_applies_requested_driver_for_legacy_session(tmp_path) -> None: + """Legacy sessions without a persisted driver still honor an explicit resume driver.""" + import asyncio + + from ouroboros.cli.commands.auto import _run_auto + + state, store, session_id = _persisted_state_with_bounds( + tmp_path, max_interview_rounds=2, max_repair_rounds=1 + ) + state.interview_driver_backend = None + store.save(state) + captured: dict[str, str | None] = {} + + async def fake_pipeline_run(self, state): # noqa: ARG001 + captured["driver"] = state.interview_driver_backend + captured["answerer_backend"] = self.interview_driver.answerer.backend + return AutoPipelineResult( + status="complete", + auto_session_id=session_id, + phase="complete", + grade="A", + ) + + with ( + patch("ouroboros.cli.commands.auto.AutoStore") as store_cls, + patch("ouroboros.cli.commands.auto.AutoPipeline.run", new=fake_pipeline_run), + ): + store_cls.return_value = store + result = asyncio.run( + _run_auto( + goal=None, + resume=session_id, + runtime=None, + driver="hermes", + skip_run=False, + ) + ) + + assert result.status == "complete" + assert captured == {"driver": "hermes", "answerer_backend": "hermes"} + + +def test_resume_rejects_brake_mismatch(tmp_path) -> None: + """Changing brake mode on resume must be explicit session mismatch, not silent mutation.""" + import asyncio + + import pytest + + from ouroboros.auto.state import AutoBrakeMode + from ouroboros.cli.commands.auto import _run_auto + + state, store, session_id = _persisted_state_with_bounds( + tmp_path, max_interview_rounds=2, max_repair_rounds=1 + ) + state.brake = AutoBrakeMode.OFF + store.save(state) + + with patch("ouroboros.cli.commands.auto.AutoStore") as store_cls: + store_cls.return_value = store + with pytest.raises(ValueError, match="resume brake mismatch"): + asyncio.run( + _run_auto( + goal=None, + resume=session_id, + runtime=None, + brake=AutoBrakeMode.ON.value, + skip_run=False, + ) + ) + + +def test_auto_without_driver_keeps_deterministic_answerer(tmp_path) -> None: + """Plain `ooo auto` must not opt into an LLM-backed answer driver.""" + import asyncio + + from ouroboros.auto.answerer import AutoAnswerer + from ouroboros.auto.state import AutoStore + from ouroboros.cli.commands.auto import _run_auto + + store = AutoStore(tmp_path) + captured: dict[str, object] = {} + + async def fake_pipeline_run(self, state): # noqa: ARG001 + captured["driver"] = state.interview_driver_backend + captured["answerer_type"] = type(self.interview_driver.answerer) + return AutoPipelineResult( + status="complete", + auto_session_id=state.auto_session_id, + phase="complete", + grade="A", + ) + + with ( + patch("ouroboros.cli.commands.auto.AutoStore") as store_cls, + patch("ouroboros.cli.commands.auto.AutoPipeline.run", new=fake_pipeline_run), + patch("ouroboros.cli.commands.auto._safe_default_cwd", return_value=tmp_path), + ): + store_cls.return_value = store + result = asyncio.run( + _run_auto( + goal="Build a CLI", + resume=None, + runtime=None, + skip_run=True, + ) + ) + + assert result.status == "complete" + assert captured == {"driver": None, "answerer_type": AutoAnswerer} + + +def test_auto_uses_configured_driver_when_flag_is_omitted(tmp_path) -> None: + """A configured auto driver opts plain `ooo auto` into selected-driver mode.""" + import asyncio + + from ouroboros.auto.state import AutoStore + from ouroboros.cli.commands.auto import _run_auto + + store = AutoStore(tmp_path) + captured: dict[str, str] = {} + + async def fake_pipeline_run(self, state): # noqa: ARG001 + captured["driver"] = state.interview_driver_backend + captured["answerer_backend"] = self.interview_driver.answerer.backend + return AutoPipelineResult( + status="complete", + auto_session_id=state.auto_session_id, + phase="complete", + grade="A", + ) + + with ( + patch("ouroboros.cli.commands.auto.AutoStore") as store_cls, + patch("ouroboros.cli.commands.auto.AutoPipeline.run", new=fake_pipeline_run), + patch("ouroboros.cli.commands.auto._safe_default_cwd", return_value=tmp_path), + patch( + "ouroboros.cli.commands.auto.get_auto_interview_driver_backend", + return_value="hermes", + ), + ): + store_cls.return_value = store + result = asyncio.run( + _run_auto( + goal="Build a CLI", + resume=None, + runtime=None, + skip_run=True, + ) + ) + + assert result.status == "complete" + assert captured == {"driver": "hermes", "answerer_backend": "hermes"} + + +def test_resume_ignores_configured_driver_for_legacy_session(tmp_path) -> None: + """Resume must preserve a deterministic legacy session unless --driver is explicit.""" + import asyncio + + from ouroboros.auto.answerer import AutoAnswerer + from ouroboros.cli.commands.auto import _run_auto + + state, store, session_id = _persisted_state_with_bounds( + tmp_path, max_interview_rounds=2, max_repair_rounds=1 + ) + state.interview_driver_backend = None + store.save(state) + captured: dict[str, object] = {} + + async def fake_pipeline_run(self, state): # noqa: ARG001 + captured["driver"] = state.interview_driver_backend + captured["answerer_type"] = type(self.interview_driver.answerer) + return AutoPipelineResult( + status="complete", + auto_session_id=session_id, + phase="complete", + grade="A", + ) + + with ( + patch("ouroboros.cli.commands.auto.AutoStore") as store_cls, + patch("ouroboros.cli.commands.auto.AutoPipeline.run", new=fake_pipeline_run), + patch( + "ouroboros.cli.commands.auto.get_auto_interview_driver_backend", + return_value="hermes", + ), + ): + store_cls.return_value = store + result = asyncio.run( + _run_auto( + goal=None, + resume=session_id, + runtime=None, + skip_run=False, + ) + ) + + assert result.status == "complete" + assert captured == {"driver": None, "answerer_type": AutoAnswerer} + + +def test_resume_accepts_claude_driver_alias_for_legacy_persisted_claude(tmp_path) -> None: + """Resume compares selected drivers using the persisted LLM backend identity.""" + import asyncio + + from ouroboros.cli.commands.auto import _run_auto + + state, store, session_id = _persisted_state_with_bounds( + tmp_path, max_interview_rounds=2, max_repair_rounds=1 + ) + state.interview_driver_backend = "claude" + store.save(state) + captured: dict[str, str | None] = {} + + async def fake_pipeline_run(self, state): # noqa: ARG001 + captured["driver"] = state.interview_driver_backend + return AutoPipelineResult( + status="complete", + auto_session_id=session_id, + phase="complete", + grade="A", + ) + + with ( + patch("ouroboros.cli.commands.auto.AutoStore") as store_cls, + patch("ouroboros.cli.commands.auto.AutoPipeline.run", new=fake_pipeline_run), + ): + store_cls.return_value = store + result = asyncio.run( + _run_auto( + goal=None, + resume=session_id, + runtime=None, + driver="claude", + skip_run=False, + ) + ) + + assert result.status == "complete" + assert captured == {"driver": "claude_code"} + + +def test_auto_persists_selected_driver_and_brake_off(tmp_path) -> None: + """`ooo auto --driver X --brake off` stores the selected interview respondent.""" + import asyncio + + from ouroboros.auto.state import AutoBrakeMode, AutoStore + from ouroboros.cli.commands.auto import _run_auto + + store = AutoStore(tmp_path) + captured: dict[str, str] = {} + + async def fake_pipeline_run(self, state): # noqa: ARG001 + captured["driver"] = state.interview_driver_backend + captured["brake"] = state.brake.value + captured["answerer_backend"] = self.interview_driver.answerer.backend + captured["answerer_brake"] = self.interview_driver.answerer.brake.value + return AutoPipelineResult( + status="complete", + auto_session_id=state.auto_session_id, + phase="complete", + grade="A", + ) + + with ( + patch("ouroboros.cli.commands.auto.AutoStore") as store_cls, + patch("ouroboros.cli.commands.auto.AutoPipeline.run", new=fake_pipeline_run), + patch("ouroboros.cli.commands.auto._safe_default_cwd", return_value=tmp_path), + ): + store_cls.return_value = store + result = asyncio.run( + _run_auto( + goal="Build a CLI", + resume=None, + runtime=None, + driver="hermes", + brake=AutoBrakeMode.OFF.value, + skip_run=True, + ) + ) + + assert result.status == "complete" + assert captured == { + "driver": "hermes", + "brake": "off", + "answerer_backend": "hermes", + "answerer_brake": "off", + } diff --git a/tests/unit/cli/test_config.py b/tests/unit/cli/test_config.py index 9f0dd9c16..5e96edeba 100644 --- a/tests/unit/cli/test_config.py +++ b/tests/unit/cli/test_config.py @@ -223,6 +223,58 @@ def test_structurally_invalid_logging_section(self, tmp_path: Path) -> None: assert "Invalid config section" in result.output +# ── config driver ──────────────────────────────────────────────── + + +class TestConfigDriver: + """Tests for config driver command.""" + + def test_show_current_driver_backend(self, config_dir: Path) -> None: + with patch("ouroboros.config.models.get_config_dir", return_value=config_dir): + result = runner.invoke(app, ["driver"]) + assert result.exit_code == 0 + assert "Current driver backend" in result.output + assert "deterministic" in result.output + + def test_driver_set_hermes_updates_auto_driver_backend(self, config_dir: Path) -> None: + with ( + patch("ouroboros.config.models.get_config_dir", return_value=config_dir), + patch("ouroboros.config.loader.load_config"), + ): + result = runner.invoke(app, ["driver", "set", "hermes"]) + + assert result.exit_code == 0 + data = yaml.safe_load((config_dir / "config.yaml").read_text()) + assert data["auto"]["interview_driver_backend"] == "hermes" + assert data["llm"]["backend"] == "claude" + + def test_driver_set_claude_stores_llm_backend_identity(self, config_dir: Path) -> None: + with ( + patch("ouroboros.config.models.get_config_dir", return_value=config_dir), + patch("ouroboros.config.loader.load_config"), + ): + result = runner.invoke(app, ["driver", "set", "claude"]) + + assert result.exit_code == 0 + data = yaml.safe_load((config_dir / "config.yaml").read_text()) + assert data["auto"]["interview_driver_backend"] == "claude_code" + + def test_driver_set_rejects_runtime_only_unknown_backend(self, config_dir: Path) -> None: + with patch("ouroboros.config.models.get_config_dir", return_value=config_dir): + result = runner.invoke(app, ["driver", "set", "nonexistent"]) + assert result.exit_code == 1 + assert "Unsupported interview driver backend" in result.output + + def test_driver_show_rejects_structurally_invalid_auto_section(self, tmp_path: Path) -> None: + config = {"orchestrator": {"runtime_backend": "claude"}, "auto": []} + (tmp_path / "config.yaml").write_text(yaml.dump(config)) + + with patch("ouroboros.config.models.get_config_dir", return_value=tmp_path): + result = runner.invoke(app, ["driver"]) + assert result.exit_code == 1 + assert "Invalid config section 'auto'" in result.output + + # ── config validate ────────────────────────────────────────────── @@ -248,6 +300,18 @@ def test_invalid_backend_exits_nonzero(self, tmp_path: Path) -> None: result = runner.invoke(app, ["validate"]) assert result.exit_code == 1 + def test_invalid_auto_driver_backend_exits_nonzero(self, tmp_path: Path) -> None: + """validate should exit 1 when configured auto driver backend is unsupported.""" + config = { + "auto": {"interview_driver_backend": "bogus"}, + "llm": {"backend": "claude"}, + } + (tmp_path / "config.yaml").write_text(yaml.dump(config)) + + with patch("ouroboros.config.models.get_config_dir", return_value=tmp_path): + result = runner.invoke(app, ["validate"]) + assert result.exit_code == 1 + def test_opencode_backend_is_valid(self, tmp_path: Path) -> None: """validate should accept opencode as a valid runtime backend.""" config = {"orchestrator": {"runtime_backend": "opencode"}, "llm": {"backend": "opencode"}} diff --git a/tests/unit/config/test_models.py b/tests/unit/config/test_models.py index ce7bc46cb..ea42e8451 100644 --- a/tests/unit/config/test_models.py +++ b/tests/unit/config/test_models.py @@ -4,6 +4,7 @@ import pytest from ouroboros.config.models import ( + AutoConfig, ClarificationConfig, ConsensusConfig, CredentialsConfig, @@ -488,6 +489,7 @@ def test_ouroboros_config_defaults(self) -> None: """OuroborosConfig has all default sections.""" config = OuroborosConfig() assert config.economics is not None + assert config.auto == AutoConfig() assert config.llm is not None assert config.clarification is not None assert config.execution is not None @@ -501,6 +503,15 @@ def test_ouroboros_config_defaults(self) -> None: assert config.runtime_controls is not None assert config.logging is not None + def test_auto_config_validates_interview_driver_backend(self) -> None: + """AutoConfig accepts known driver aliases and rejects unknown defaults.""" + assert AutoConfig(interview_driver_backend="claude_code").interview_driver_backend == ( + "claude_code" + ) + + with pytest.raises(ValidationError): + AutoConfig(interview_driver_backend="bogus") + def test_ouroboros_config_accepts_llm_profiles(self) -> None: """OuroborosConfig stores task profiles and role mappings.""" config = OuroborosConfig( diff --git a/tests/unit/router/test_packaged_auto_dispatch.py b/tests/unit/router/test_packaged_auto_dispatch.py index e6128c03b..744573f48 100644 --- a/tests/unit/router/test_packaged_auto_dispatch.py +++ b/tests/unit/router/test_packaged_auto_dispatch.py @@ -1,7 +1,13 @@ from __future__ import annotations from pathlib import Path +from unittest.mock import AsyncMock, patch +import pytest + +from ouroboros.core.types import Result +from ouroboros.mcp.types import ContentType, MCPContentItem, MCPToolResult +from ouroboros.orchestrator.command_dispatcher import create_codex_command_dispatcher from ouroboros.router import Resolved, resolve_skill_dispatch @@ -21,5 +27,100 @@ def test_packaged_auto_skill_dispatches_to_ouroboros_auto(tmp_path: Path) -> Non "max_interview_rounds": "", "max_repair_rounds": "", "skip_run": True, + "driver": "", + "brake": "", } assert result.first_argument == "Audit the open PRs --skip-run" + + +def test_packaged_auto_skill_dispatches_driver_and_brake_to_ouroboros_auto( + tmp_path: Path, +) -> None: + """Lock selected-driver options on the packaged ``ooo auto`` skill surface.""" + prompt = 'ooo auto "Audit the open PRs" --driver hermes --brake off' + + result = resolve_skill_dispatch(prompt, cwd=tmp_path) + + assert isinstance(result, Resolved) + assert result.command_prefix == "ooo auto" + assert result.mcp_tool == "ouroboros_auto" + assert result.mcp_args == { + "goal": "Audit the open PRs", + "resume": "", + "cwd": str(tmp_path), + "max_interview_rounds": "", + "max_repair_rounds": "", + "skip_run": "", + "driver": "hermes", + "brake": "off", + } + assert result.first_argument == "Audit the open PRs --driver hermes --brake off" + + +def _fake_ouroboros_server() -> AsyncMock: + server = AsyncMock() + server.call_tool = AsyncMock( + return_value=Result.ok( + MCPToolResult( + content=(MCPContentItem(type=ContentType.TEXT, text="ok"),), + meta={"auto_session_id": "auto_test"}, + ) + ) + ) + return server + + +@pytest.mark.asyncio +async def test_packaged_auto_skill_dispatch_forwards_driver_and_brake_to_mcp_payload( + tmp_path: Path, +) -> None: + """Lock packaged ``ooo auto`` driver/brake forwarding into the actual MCP call.""" + prompt = 'ooo auto "Audit the open PRs" --driver hermes --brake off --skip-run' + result = resolve_skill_dispatch(prompt, cwd=tmp_path) + assert isinstance(result, Resolved) + + fake_server = _fake_ouroboros_server() + dispatch = create_codex_command_dispatcher(cwd=tmp_path) + + with patch( + "ouroboros.mcp.server.adapter.create_ouroboros_server", + return_value=fake_server, + ): + messages = await dispatch(result, None) + + fake_server.call_tool.assert_awaited_once() + tool_name, payload = fake_server.call_tool.await_args.args + assert tool_name == "ouroboros_auto" + assert payload["goal"] == "Audit the open PRs" + assert payload["driver"] == "hermes" + assert payload["brake"] == "off" + assert payload["skip_run"] is True + assert messages is not None + assert messages[0].data["tool_input"] == payload + + +@pytest.mark.asyncio +async def test_packaged_auto_skill_dispatch_does_not_leak_placeholder_strings_for_unset_driver_brake( + tmp_path: Path, +) -> None: + """Plain ``ooo auto`` must not leak ``$driver``/``$brake`` placeholder literals into MCP.""" + result = resolve_skill_dispatch('ouroboros:auto "Audit the open PRs"', cwd=tmp_path) + if not isinstance(result, Resolved): + result = resolve_skill_dispatch('ooo auto "Audit the open PRs"', cwd=tmp_path) + assert isinstance(result, Resolved) + + fake_server = _fake_ouroboros_server() + dispatch = create_codex_command_dispatcher(cwd=tmp_path) + + with patch( + "ouroboros.mcp.server.adapter.create_ouroboros_server", + return_value=fake_server, + ): + await dispatch(result, None) + + payload = fake_server.call_tool.await_args.args[1] + assert payload["goal"] == "Audit the open PRs" + assert payload.get("driver", "") != "$driver" + assert payload.get("brake", "") != "$brake" + assert payload.get("driver", "") in ("", None) + assert payload.get("brake", "") in ("", None)