Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions cmd/entire/cli/review/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
package review

import (
"errors"
"fmt"
"io"
"strings"
Expand Down Expand Up @@ -63,15 +64,20 @@ func (s DumpSink) dumpAgent(run reviewtypes.AgentRun) {
// agent-level RunError events the parser emitted (typically a torn
// stdout stream — caught at the orchestrator level by classifyStatus
// even when the process itself exited 0).
if run.Err != nil {
fmt.Fprintf(&b, "**Failed:** `%v`\n\n", run.Err)
} else {
b.WriteString("**Failed**\n\n")
}
writeFailureHeader(&b, run.Err)
for _, ev := range run.Buffer {
if re, ok := ev.(reviewtypes.RunError); ok && re.Err != nil {
fmt.Fprintf(&b, "> agent error: `%v`\n\n", re.Err)
re, ok := ev.(reviewtypes.RunError)
if !ok || re.Err == nil {
continue
}
// Skip the synthetic RunError that the orchestrator emits to drive
// live TUI updates: it carries the same error as run.Err, which
// the failure header already rendered. Without this the same
// error text appears twice in adjacent output blocks.
if run.Err != nil && errors.Is(re.Err, run.Err) {
continue
}
fmt.Fprintf(&b, "> agent error: `%v`\n\n", re.Err)
}
// Render any narrative text the agent produced before the failure
// surfaced — useful when the parser tore mid-response so reviewers
Expand All @@ -97,6 +103,26 @@ func (s DumpSink) dumpAgent(run reviewtypes.AgentRun) {
fmt.Fprint(s.W, rendered)
}

// writeFailureHeader formats the failure block for a failed agent run.
// When run.Err is a *ProcessError carrying captured stderr, the stderr is
// rendered in a fenced code block on its own — preserving newlines so multi-line
// agent CLI output (auth errors, stack traces, etc.) is readable instead of
// collapsed inside an inline-code span. Generic errors (non-ProcessError, or
// ProcessError without stderr) keep the inline backtick rendering.
func writeFailureHeader(b *strings.Builder, runErr error) {
if runErr == nil {
b.WriteString("**Failed**\n\n")
return
}
var pe *reviewtypes.ProcessError
if errors.As(runErr, &pe) && pe.Stderr != "" {
fmt.Fprintf(b, "**Failed:** `%s` exited (`%v`). Stderr:\n\n", pe.AgentName, pe.Err)
fmt.Fprintf(b, "```\n%s\n```\n\n", pe.Stderr)
return
}
fmt.Fprintf(b, "**Failed:** `%v`\n\n", runErr)
}

// joinAssistantText extracts AssistantText events from a buffer and joins
// them with newlines, trimming the result to keep dump output tight.
func joinAssistantText(buf []reviewtypes.Event) string {
Expand Down
83 changes: 83 additions & 0 deletions cmd/entire/cli/review/dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,89 @@ func TestDumpSink_FailedAgentNoErr(t *testing.T) {
}
}

func TestDumpSink_FailedAgentWithProcessErrorRendersStderrAsCodeFence(t *testing.T) {
t.Parallel()
var buf bytes.Buffer
sink := DumpSink{W: &buf}

stderr := "Error: API key invalid\nPlease set ANTHROPIC_API_KEY\nHint: see /docs/auth"
pe := &reviewtypes.ProcessError{
AgentName: "claude-code",
Err: errors.New("exit status 1"),
Stderr: stderr,
}
run := reviewtypes.AgentRun{
Name: "claude-code",
Status: reviewtypes.AgentStatusFailed,
Err: pe,
}
sink.RunFinished(makeSummary(run))

out := buf.String()

// Failure header still mentions the exit error.
if !strings.Contains(out, "exit status 1") {
t.Errorf("expected exit status mention in failure header, got:\n%s", out)
}
// Each stderr line appears in output.
for _, line := range []string{
"Error: API key invalid",
"Please set ANTHROPIC_API_KEY",
"Hint: see /docs/auth",
} {
if !strings.Contains(out, line) {
t.Errorf("expected stderr line %q in output, got:\n%s", line, out)
}
}
// Stderr is rendered in a fenced code block (```), not crammed into a
// single inline-code span (`...`). The fence is the load-bearing fix:
// multi-line stderr inside backticks loses newlines under markdown rendering.
if !strings.Contains(out, "```") {
t.Errorf("expected fenced code block delimiting stderr, got:\n%s", out)
}
// The whole error is NOT crammed into one inline-code span. The current
// (broken) rendering produces a single backticked line containing the
// full stderr; the fix moves stderr into its own fenced block, so the
// header line itself does not contain "stderr:".
if strings.Contains(out, "**Failed:** `claude-code: exit status 1: stderr:") {
t.Errorf("stderr must not be jammed into the inline failure header, got:\n%s", out)
}
}

func TestDumpSink_DoesNotDoublePrintSyntheticRunErrorMatchingRunErr(t *testing.T) {
t.Parallel()
var buf bytes.Buffer
sink := DumpSink{W: &buf}

// The orchestrator emits a synthetic RunError carrying waitErr so the live
// TUI sees the failure mid-run. That same waitErr is also stored on
// AgentRun.Err and rendered in the failure header (fenced stderr block).
// The dump must not also render a "> agent error: <err>" blockquote for
// the synthetic event — that produces visible duplication where the same
// error text appears twice in adjacent output.
waitErr := errors.New("exit status 1")
run := reviewtypes.AgentRun{
Name: "claude-code",
Status: reviewtypes.AgentStatusFailed,
Err: waitErr,
Buffer: []reviewtypes.Event{
reviewtypes.Started{},
reviewtypes.Finished{Success: true},
reviewtypes.RunError{Err: waitErr}, // synthetic, same pointer as run.Err
},
}
sink.RunFinished(makeSummary(run))

out := buf.String()
if strings.Contains(out, "> agent error:") {
t.Errorf("synthetic RunError matching run.Err must not produce a blockquote, got:\n%s", out)
}
// Header should still be present.
if !strings.Contains(out, "**Failed:**") {
t.Errorf("expected failure header, got:\n%s", out)
}
}

func TestDumpSink_FailedAgentRunErrorEvent(t *testing.T) {
t.Parallel()
var buf bytes.Buffer
Expand Down
17 changes: 17 additions & 0 deletions cmd/entire/cli/review/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,23 @@ func Run(

waitErr := proc.Wait()
finished := time.Now()
// Emit a synthetic RunError carrying waitErr so sinks see the failure
// during the live event stream rather than only via RunFinished. Without
// this, the parser's typical clean-EOF Finished{Success:true} would leave
// the TUI dashboard showing "✓ done" until the orchestrator's summary
// arrives — for multi-agent runs that's the entire duration of the other
// agents, not just a flicker.
if waitErr != nil {
Comment thread
peyton-alt marked this conversation as resolved.
Outdated
synthEvent := reviewtypes.RunError{Err: waitErr}
buffer = append(buffer, synthEvent)
sawRunError = true
if firstRunErr == nil {
firstRunErr = waitErr
}
for _, sink := range sinks {
sink.AgentEvent(reviewer.Name(), synthEvent)
}
}
status := classifyStatus(ctx, waitErr, eventOutcome{finishedSeen: finishedSeen, finishedOk: finishedOk, sawRunError: sawRunError})
runErr := waitErr
if runErr == nil && status == reviewtypes.AgentStatusFailed {
Expand Down
12 changes: 11 additions & 1 deletion cmd/entire/cli/review/run_multi.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,18 @@ func RunMulti(
for ev := range p.Events() {
fanIn <- taggedEvent{agentIdx: idx, ev: ev}
}
states[idx].waitErr = p.Wait()
waitErr := p.Wait()
states[idx].waitErr = waitErr
states[idx].finishedAt = time.Now()
// Emit a synthetic RunError so sinks (TUI dashboard, dump) see
// the failure live rather than only via RunFinished. Otherwise
// the parser's clean-EOF Finished{Success:true} leaves a row
// showing "✓ done" even when the agent process exited non-zero,
// for the entire remaining duration of any other still-running
// agents.
if waitErr != nil {
Comment thread
peyton-alt marked this conversation as resolved.
Outdated
fanIn <- taggedEvent{agentIdx: idx, ev: reviewtypes.RunError{Err: waitErr}}
}
}(i, proc)
}

Expand Down
58 changes: 55 additions & 3 deletions cmd/entire/cli/review/run_multi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,12 @@ func TestRunMulti_OneSucceedsOneFails(t *testing.T) {
if len(summary.AgentRuns) != 2 {
t.Fatalf("expected 2 AgentRuns, got %d", len(summary.AgentRuns))
}
// Both agents delivered events to the sink.
if len(rec.agentEvents) != 4 {
t.Errorf("expected 4 AgentEvent calls (2 per agent), got %d", len(rec.agentEvents))
// Both agents delivered events to the sink. ok-agent emits 2 events
// (Started, Finished); fail-agent emits 2 events (Started, Finished)
// plus a synthetic RunError emitted by the orchestrator after Wait()
// returns the non-nil exit error — total 5.
if len(rec.agentEvents) != 5 {
t.Errorf("expected 5 AgentEvent calls (ok: 2, fail: 2 + synthetic RunError), got %d", len(rec.agentEvents))
}
// Verify per-agent statuses.
statusFor := func(name string) reviewtypes.AgentStatus {
Expand Down Expand Up @@ -348,6 +351,55 @@ func TestRunMulti_SinkFanOut(t *testing.T) {
}
}

// TestRunMulti_EmitsSyntheticRunErrorWhenAgentWaitErrIsNonNil verifies that
// when an individual agent's process exits non-zero, the orchestrator emits
// a RunError event into the live sink stream — not only into the final
// summary. This is what lets the TUI dashboard flip a row from "✓ done"
// (from the parser's clean-EOF Finished{Success:true}) to "✗ failed" while
// other agents are still running. Without this synthetic emission, multi-agent
// runs would show stale "succeeded" status for the entire duration of any
// later-finishing agents.
func TestRunMulti_EmitsSyntheticRunErrorWhenAgentWaitErrIsNonNil(t *testing.T) {
t.Parallel()
failingWait := errors.New("exit status 1: stderr: invalid_api_key")
failer := &stubReviewer{
name: "claude-code",
events: []reviewtypes.Event{
reviewtypes.Started{},
reviewtypes.Finished{Success: true}, // parser saw clean EOF
},
waitErr: failingWait,
}
succeeder := &stubReviewer{
name: "codex",
events: []reviewtypes.Event{
reviewtypes.Started{},
reviewtypes.AssistantText{Text: "looks good"},
reviewtypes.Finished{Success: true},
},
}
rec := &stubSinkRecorder{}

_, err := RunMulti(context.Background(), []reviewtypes.AgentReviewer{failer, succeeder}, reviewtypes.RunConfig{}, []reviewtypes.Sink{rec})
if err == nil {
t.Fatal("expected non-nil firstErr from failing agent")
}

var found bool
for _, evt := range rec.agentEvents {
if evt.agent != "claude-code" {
continue
}
if re, ok := evt.ev.(reviewtypes.RunError); ok && re.Err != nil && re.Err.Error() == failingWait.Error() {
found = true
break
}
}
if !found {
t.Errorf("expected synthetic RunError for failing agent in live sink stream, got events: %+v", rec.agentEvents)
}
}

// TestRunMulti_TokenTracking verifies Tokens overwrite semantics in multi-agent
// runs: the last Tokens event per agent supersedes earlier ones.
func TestRunMulti_TokenTracking(t *testing.T) {
Expand Down
66 changes: 66 additions & 0 deletions cmd/entire/cli/review/run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,72 @@ func TestRun_TokenTracking(t *testing.T) {
}
}

func TestRun_EmitsSyntheticRunErrorWhenWaitErrIsNonNil(t *testing.T) {
t.Parallel()
// Common case: parser reads agent's stdout to clean EOF (no scanner error)
// and emits Finished{Success:true} — but the agent process actually exited
// non-zero with stderr. The waitErr captures that. Without a synthetic
// RunError emitted post-Wait, sinks (TUI, dump) only learn about the
// failure via RunFinished — too late for the live dashboard to flip from
// "✓ done" to "✗ failed" while other agents are still running.
waitErr := errors.New("exit status 1: stderr: invalid_api_key")
reviewer := &stubReviewer{
name: "claude-code",
events: []reviewtypes.Event{
reviewtypes.Started{},
reviewtypes.Finished{Success: true},
},
waitErr: waitErr,
}
rec := &stubSinkRecorder{}

_, err := Run(context.Background(), reviewer, reviewtypes.RunConfig{}, []reviewtypes.Sink{rec})
if err == nil {
t.Fatal("expected non-nil error from failing run")
}

// Look for a RunError event in the live stream (forwarded to sinks via
// AgentEvent) carrying the wait error.
var found bool
for _, evt := range rec.agentEvents {
if re, ok := evt.ev.(reviewtypes.RunError); ok && re.Err != nil && re.Err.Error() == waitErr.Error() {
found = true
break
}
}
if !found {
t.Errorf("expected synthetic RunError(waitErr) in live sink stream after Wait, got events: %+v", rec.agentEvents)
}
}

func TestRun_DoesNotEmitSyntheticRunErrorOnCleanExit(t *testing.T) {
t.Parallel()
// Happy path: process exits 0, parser sees clean EOF. No synthetic
// RunError should appear — the existing Finished{Success:true} is the
// authoritative signal.
reviewer := &stubReviewer{
name: "claude-code",
events: []reviewtypes.Event{
reviewtypes.Started{},
reviewtypes.AssistantText{Text: "looks good"},
reviewtypes.Finished{Success: true},
},
waitErr: nil,
}
rec := &stubSinkRecorder{}

_, err := Run(context.Background(), reviewer, reviewtypes.RunConfig{}, []reviewtypes.Sink{rec})
if err != nil {
t.Fatalf("expected nil error on clean exit, got %v", err)
}

for _, evt := range rec.agentEvents {
if _, ok := evt.ev.(reviewtypes.RunError); ok {
t.Errorf("clean exit should not produce a synthetic RunError, got: %+v", evt.ev)
}
}
}

func TestRun_SinkFanOut(t *testing.T) {
t.Parallel()
events := []reviewtypes.Event{
Expand Down
Loading
Loading