Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions skills/auto/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ mcp_args:
max_interview_rounds: "$max_interview_rounds"
max_repair_rounds: "$max_repair_rounds"
skip_run: "$skip_run"
driver: "$driver"
brake: "$brake"
---

# /ouroboros:auto
Expand All @@ -30,6 +32,7 @@ is unavailable. A manual fallback is not an `ooo auto` run.
ooo auto "Build a local-first habit tracker CLI"
ooo auto --resume auto_abc123
ooo auto "Build a local-first habit tracker CLI" --skip-run
ooo auto "Build a local-first habit tracker CLI" --driver hermes --brake on
/ouroboros:auto "Build a local-first habit tracker CLI"
```

Expand All @@ -42,3 +45,10 @@ ooo auto "Build a local-first habit tracker CLI" --skip-run
5. Starts execution only after A-grade.

The pipeline must not hang indefinitely: all loops are bounded and timeout failures return a resumable `auto_session_id`. Resume with `ooo auto --resume <auto_session_id>`. Use `--skip-run` to stop after the A-grade Seed. The CLI-only `--show-ledger` flag prints assumptions/non-goals; MCP skill responses already include the same ledger summary when available.

When invoked through the interactive CLI without `--driver` or a configured
default driver, `ooo auto` asks whether to use a selected interview driver if
one of the supported driver CLIs is installed. Declining that prompt, or having
no installed driver CLI, keeps the deterministic auto answerer. Use
`--driver <backend>` to select a driver explicitly; use `--brake on|off` to
control whether risky driver answers block for approval.
1 change: 1 addition & 0 deletions src/ouroboros/auto/answerer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class AutoAnswerSource(StrEnum):
EXISTING_CONVENTION = "existing_convention"
CONSERVATIVE_DEFAULT = "conservative_default"
ASSUMPTION = "assumption"
DRIVER = "driver"
NON_GOAL = "non_goal"
BLOCKER = "blocker"

Expand Down
268 changes: 268 additions & 0 deletions src/ouroboros/auto/driver_answerer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
"""Selected-driver interview answering for ``ooo auto``."""

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
import re
from typing import Protocol

from ouroboros.auto.answerer import (
AutoAnswer,
AutoAnswerContext,
AutoAnswerer,
AutoAnswerSource,
AutoBlocker,
)
from ouroboros.auto.ledger import LedgerEntry, LedgerSource, LedgerStatus, SeedDraftLedger
from ouroboros.auto.state import AutoBrakeMode
from ouroboros.providers.base import CompletionConfig, LLMAdapter, Message, MessageRole
from ouroboros.providers.factory import create_llm_adapter, resolve_llm_backend


class AsyncAutoAnswerer(Protocol):
"""Protocol for answerers that can draft interview answers asynchronously."""

async def answer(
self, question: str, ledger: SeedDraftLedger, context: AutoAnswerContext | None = None
) -> AutoAnswer:
"""Draft an answer for one interview question."""

def apply(self, answer: AutoAnswer, ledger: SeedDraftLedger, *, question: str) -> None:
"""Apply ledger updates associated with an answer."""


@dataclass(slots=True)
class DriverAutoAnswerer:
"""Ask the selected ``llm.backend`` driver to answer every interview question.

The existing deterministic ``AutoAnswerer`` is still used as a ledger/risk
scaffold, but the text sent back to the interview backend comes from the
selected driver. With brake=on, high-impact/risky drafts become approval
blockers. With brake=off, they are sent automatically with assumption and
provenance tags so the later Seed-ready/A-grade gates remain the safety net.
"""

backend: str | None = None
brake: AutoBrakeMode = AutoBrakeMode.ON
cwd: str | Path | None = None
adapter: LLMAdapter | None = None
baseline: AutoAnswerer = field(default_factory=AutoAnswerer)
timeout_seconds: float | None = 60.0

def __post_init__(self) -> None:
self.backend = resolve_llm_backend(self.backend)

async def answer(
self, question: str, ledger: SeedDraftLedger, context: AutoAnswerContext | None = None
) -> AutoAnswer:
"""Return the selected driver's answer for ``question``."""
scaffold = self.baseline.answer(question, ledger, context)
risk = classify_interview_answer_risk(question, scaffold)
if risk and self.brake == AutoBrakeMode.ON:
reason = f"brake on: risky auto interview answer requires approval ({risk})"
return AutoAnswer(
text=f"Cannot send automatically without approval: {risk}",
source=AutoAnswerSource.BLOCKER,
confidence=1.0,
blocker=AutoBlocker(reason=reason, question=question),
)

if self.adapter is None:
allowed_tools: list[str] | None = None if self.backend == "hermes" else []
self.adapter = create_llm_adapter(
backend=self.backend,
use_case="interview",
cwd=self.cwd,
allowed_tools=allowed_tools,
max_turns=1,
timeout=self.timeout_seconds,
)
assert self.adapter is not None
prompt = _driver_prompt(
question, ledger, scaffold, backend=self.backend or "driver", risk=risk
)
result = await self.adapter.complete(
messages=[Message(role=MessageRole.USER, content=prompt)],
config=CompletionConfig(
model="default",
temperature=0.2,
max_tokens=700,
role="auto_interview_answer",
max_turns=1,
),
)
if not result.is_ok:
return AutoAnswer(
text=f"Cannot obtain driver answer: {result.error}",
source=AutoAnswerSource.BLOCKER,
confidence=1.0,
blocker=AutoBlocker(
reason=f"selected driver {self.backend} failed to answer: {result.error}",
question=question,
),
)
text = _clean_driver_text(result.value.content)
if not text:
return AutoAnswer(
text="Cannot obtain driver answer: empty response",
source=AutoAnswerSource.BLOCKER,
confidence=1.0,
blocker=AutoBlocker(
reason=f"selected driver {self.backend} returned an empty answer",
question=question,
),
)

assumptions = list(scaffold.assumptions)
confidence = min(scaffold.confidence, 0.82)
if risk:
assumptions.append(f"brake off auto-sent risky driver answer: {risk}")
confidence = min(confidence, 0.62)
tagged_text = _tag_driver_text(
text, backend=self.backend or "driver", brake=self.brake, risk=risk
)
return AutoAnswer(
text=tagged_text,
source=AutoAnswerSource.DRIVER,
confidence=confidence,
ledger_updates=_ledger_updates_for(
scaffold,
driver_text=tagged_text,
risk=risk,
backend=self.backend or "driver",
),
assumptions=assumptions,
non_goals=list(scaffold.non_goals),
)

def apply(self, answer: AutoAnswer, ledger: SeedDraftLedger, *, question: str) -> None:
"""Apply a selected-driver answer to the ledger."""
self.baseline.apply(answer, ledger, question=question)


def classify_interview_answer_risk(question: str, scaffold: AutoAnswer | None = None) -> str | None:
"""Return a risk label when an interview answer should be approval-gated."""
if scaffold is not None and scaffold.blocker is not None:
return scaffold.blocker.reason
lowered = question.lower()
patterns: tuple[tuple[str, str], ...] = (
(
r"\b(legal|privacy|pii|gdpr|hipaa|compliance|security|credential|secret|token|api key|password)\b",
"legal/privacy/security/compliance",
),
(
r"\b(delete|destroy|drop|wipe|remove|irreversible|production|prod|deploy|billing|charge|payment|financial)\b",
"destructive or financial/production choice",
),
(
r"\b(add|expand|new acceptance|scope|trade[- ]?off|pricing|business|product decision)\b",
"scope or product/business tradeoff",
),
(
r"\b(prefer|preference|always|never)\b.*\b(user|customer|stakeholder)\b",
"unknown user preference",
),
)
for pattern, label in patterns:
if re.search(pattern, lowered):
return label
if scaffold is not None and scaffold.confidence < 0.65:
return "low-confidence high-impact answer"
return None


def _driver_prompt(
question: str,
ledger: SeedDraftLedger,
scaffold: AutoAnswer,
*,
backend: str,
risk: str | None,
) -> str:
open_gaps = ", ".join(ledger.open_gaps()) or "none"
risk_line = f"Risk label: {risk}." if risk else "Risk label: none."
return f"""You are the selected ooo auto interview driver: {backend}.
Answer the Ouroboros Socratic interview question on behalf of the user.

Rules:
- Answer directly and concisely in 1-4 sentences.
- Preserve the user's goal and avoid inventing user preferences.
- If you make an assumption, state it explicitly.
- Do not ask a follow-up question; this auto mode must answer every interview question.
- Existing auto pipeline, Seed-ready checks, and A-grade review continue after your answer.

Current goal: {_ledger_goal(ledger)}
Open ledger gaps: {open_gaps}
Deterministic scaffold answer: {scaffold.text}
{risk_line}

Interview question:
{question}
""".strip()


def _ledger_goal(ledger: SeedDraftLedger) -> str:
entries = ledger.sections.get("goal").entries if "goal" in ledger.sections else []
for entry in reversed(entries):
if entry.value.strip():
return entry.value.strip()
return ""


def _clean_driver_text(text: str) -> str:
text = text.strip()
if text.startswith("```") and text.endswith("```"):
text = text.strip("`").strip()
return text


def _tag_driver_text(text: str, *, backend: str, brake: AutoBrakeMode, risk: str | None) -> str:
tags = [f"driver={backend}", f"brake={brake.value}"]
if risk:
tags.append(f"risk={risk}")
return f"[{' ; '.join(tags)}] {text}"


def _ledger_updates_for(
scaffold: AutoAnswer, *, driver_text: str, risk: str | None, backend: str
) -> list[tuple[str, LedgerEntry]]:
updates = [
(
section,
LedgerEntry(
key=entry.key,
value=entry.value,
source=entry.source,
confidence=min(entry.confidence, 0.72),
status=entry.status,
reversible=entry.reversible,
rationale=(
"Selected-driver answer was sent to the interview; structured ledger "
"state preserves the deterministic scaffold to avoid collapsing "
f"section-specific contracts. Driver answer was: {driver_text}"
),
evidence=[*entry.evidence, f"driver:{backend}"],
),
)
for section, entry in scaffold.ledger_updates
]
if risk:
updates.append(
(
"constraints",
LedgerEntry(
key=f"risk.auto_driver.{_slug_key(risk)}",
value=f"Driver {backend} auto-sent a risky interview answer under brake=off: {risk}",
source=LedgerSource.ASSUMPTION,
confidence=0.6,
status=LedgerStatus.INFERRED,
rationale="Risk was preserved as provenance for Seed-ready and A-grade review gates.",
),
)
)
return updates


def _slug_key(value: str) -> str:
return re.sub(r"[^a-z0-9]+", "_", value.lower()).strip("_") or "risk"
17 changes: 13 additions & 4 deletions src/ouroboros/auto/interview_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import asyncio
from collections.abc import Awaitable, Callable
from dataclasses import dataclass, field
import inspect
import re
from typing import Protocol

Expand Down Expand Up @@ -127,7 +128,7 @@ async def run(self, state: AutoPipelineState, ledger: SeedDraftLedger) -> AutoIn
state.mark_progress(f"interview round {round_number}/{self.max_rounds}")
self._save(state)

answer = self._answer_with_gap_steering(turn.question, ledger, answer_context)
answer = await self._answer_with_gap_steering(turn.question, ledger, answer_context)
if answer.blocker is not None:
self.answerer.apply(answer, ledger, question=turn.question)
state.ledger = ledger.to_dict()
Expand Down Expand Up @@ -193,10 +194,10 @@ async def run(self, state: AutoPipelineState, ledger: SeedDraftLedger) -> AutoIn
"blocked", state.interview_session_id, ledger, self.max_rounds, blocker
)

def _answer_with_gap_steering(
async def _answer_with_gap_steering(
self, question: str, ledger: SeedDraftLedger, context: AutoAnswerContext
) -> AutoAnswer:
answer = self.answerer.answer(question, ledger, context)
answer = await self._answer(question, ledger, context)
if answer.blocker is not None:
return answer
gaps = self.gap_detector.detect(ledger)
Expand All @@ -219,7 +220,15 @@ def _answer_with_gap_steering(
confidence=1.0,
blocker=blocker,
)
return self.answerer.answer(_gap_prompt(next_gap), ledger, context)
return await self._answer(_gap_prompt(next_gap), ledger, context)

async def _answer(
self, question: str, ledger: SeedDraftLedger, context: AutoAnswerContext
) -> AutoAnswer:
answer = self.answerer.answer(question, ledger, context)
if inspect.isawaitable(answer):
answer = await answer
return answer

def _handle_completed_turn(
self, state: AutoPipelineState, ledger: SeedDraftLedger, turn: InterviewTurn, rounds: int
Expand Down
Loading