entireio · peyton-alt · May 12, 2026 · May 8, 2026 · May 8, 2026 · May 8, 2026
@@ -12,6 +12,7 @@
 package review
 
 import (
+	"errors"
 	"fmt"
 	"io"
 	"strings"
@@ -63,15 +64,20 @@ func (s DumpSink) dumpAgent(run reviewtypes.AgentRun) {
 		// agent-level RunError events the parser emitted (typically a torn
 		// stdout stream — caught at the orchestrator level by classifyStatus
 		// even when the process itself exited 0).
-		if run.Err != nil {
-			fmt.Fprintf(&b, "**Failed:** `%v`\n\n", run.Err)
-		} else {
-			b.WriteString("**Failed**\n\n")
-		}
+		writeFailureHeader(&b, run.Err)
 		for _, ev := range run.Buffer {
-			if re, ok := ev.(reviewtypes.RunError); ok && re.Err != nil {
-				fmt.Fprintf(&b, "> agent error: `%v`\n\n", re.Err)
+			re, ok := ev.(reviewtypes.RunError)
+			if !ok || re.Err == nil {
+				continue
+			}
+			// Skip the synthetic RunError that the orchestrator emits to drive
+			// live TUI updates: it carries the same error as run.Err, which
+			// the failure header already rendered. Without this the same
+			// error text appears twice in adjacent output blocks.
+			if run.Err != nil && errors.Is(re.Err, run.Err) {
+				continue
 			}
+			fmt.Fprintf(&b, "> agent error: `%v`\n\n", re.Err)
 		}
 		// Render any narrative text the agent produced before the failure
 		// surfaced — useful when the parser tore mid-response so reviewers
@@ -97,6 +103,26 @@ func (s DumpSink) dumpAgent(run reviewtypes.AgentRun) {
 	fmt.Fprint(s.W, rendered)
 }
 
+// writeFailureHeader formats the failure block for a failed agent run.
+// When run.Err is a *ProcessError carrying captured stderr, the stderr is
+// rendered in a fenced code block on its own — preserving newlines so multi-line
+// agent CLI output (auth errors, stack traces, etc.) is readable instead of
+// collapsed inside an inline-code span. Generic errors (non-ProcessError, or
+// ProcessError without stderr) keep the inline backtick rendering.
+func writeFailureHeader(b *strings.Builder, runErr error) {
+	if runErr == nil {
+		b.WriteString("**Failed**\n\n")
+		return
+	}
+	var pe *reviewtypes.ProcessError
+	if errors.As(runErr, &pe) && pe.Stderr != "" {
+		fmt.Fprintf(b, "**Failed:** `%s` exited (`%v`). Stderr:\n\n", pe.AgentName, pe.Err)
+		fmt.Fprintf(b, "```\n%s\n```\n\n", pe.Stderr)
+		return
+	}
+	fmt.Fprintf(b, "**Failed:** `%v`\n\n", runErr)
+}
+
 // joinAssistantText extracts AssistantText events from a buffer and joins
 // them with newlines, trimming the result to keep dump output tight.
 func joinAssistantText(buf []reviewtypes.Event) string {

@@ -91,6 +91,89 @@ func TestDumpSink_FailedAgentNoErr(t *testing.T) {
 	}
 }
 
+func TestDumpSink_FailedAgentWithProcessErrorRendersStderrAsCodeFence(t *testing.T) {
+	t.Parallel()
+	var buf bytes.Buffer
+	sink := DumpSink{W: &buf}
+
+	stderr := "Error: API key invalid\nPlease set ANTHROPIC_API_KEY\nHint: see /docs/auth"
+	pe := &reviewtypes.ProcessError{
+		AgentName: "claude-code",
+		Err:       errors.New("exit status 1"),
+		Stderr:    stderr,
+	}
+	run := reviewtypes.AgentRun{
+		Name:   "claude-code",
+		Status: reviewtypes.AgentStatusFailed,
+		Err:    pe,
+	}
+	sink.RunFinished(makeSummary(run))
+
+	out := buf.String()
+
+	// Failure header still mentions the exit error.
+	if !strings.Contains(out, "exit status 1") {
+		t.Errorf("expected exit status mention in failure header, got:\n%s", out)
+	}
+	// Each stderr line appears in output.
+	for _, line := range []string{
+		"Error: API key invalid",
+		"Please set ANTHROPIC_API_KEY",
+		"Hint: see /docs/auth",
+	} {
+		if !strings.Contains(out, line) {
+			t.Errorf("expected stderr line %q in output, got:\n%s", line, out)
+		}
+	}
+	// Stderr is rendered in a fenced code block (```), not crammed into a
+	// single inline-code span (`...`). The fence is the load-bearing fix:
+	// multi-line stderr inside backticks loses newlines under markdown rendering.
+	if !strings.Contains(out, "```") {
+		t.Errorf("expected fenced code block delimiting stderr, got:\n%s", out)
+	}
+	// The whole error is NOT crammed into one inline-code span. The current
+	// (broken) rendering produces a single backticked line containing the
+	// full stderr; the fix moves stderr into its own fenced block, so the
+	// header line itself does not contain "stderr:".
+	if strings.Contains(out, "**Failed:** `claude-code: exit status 1: stderr:") {
+		t.Errorf("stderr must not be jammed into the inline failure header, got:\n%s", out)
+	}
+}
+
+func TestDumpSink_DoesNotDoublePrintSyntheticRunErrorMatchingRunErr(t *testing.T) {
+	t.Parallel()
+	var buf bytes.Buffer
+	sink := DumpSink{W: &buf}
+
+	// The orchestrator emits a synthetic RunError carrying waitErr so the live
+	// TUI sees the failure mid-run. That same waitErr is also stored on
+	// AgentRun.Err and rendered in the failure header (fenced stderr block).
+	// The dump must not also render a "> agent error: <err>" blockquote for
+	// the synthetic event — that produces visible duplication where the same
+	// error text appears twice in adjacent output.
+	waitErr := errors.New("exit status 1")
+	run := reviewtypes.AgentRun{
+		Name:   "claude-code",
+		Status: reviewtypes.AgentStatusFailed,
+		Err:    waitErr,
+		Buffer: []reviewtypes.Event{
+			reviewtypes.Started{},
+			reviewtypes.Finished{Success: true},
+			reviewtypes.RunError{Err: waitErr}, // synthetic, same pointer as run.Err
+		},
+	}
+	sink.RunFinished(makeSummary(run))
+
+	out := buf.String()
+	if strings.Contains(out, "> agent error:") {
+		t.Errorf("synthetic RunError matching run.Err must not produce a blockquote, got:\n%s", out)
+	}
+	// Header should still be present.
+	if !strings.Contains(out, "**Failed:**") {
+		t.Errorf("expected failure header, got:\n%s", out)
+	}
+}
+
 func TestDumpSink_FailedAgentRunErrorEvent(t *testing.T) {
 	t.Parallel()
 	var buf bytes.Buffer

@@ -82,6 +82,23 @@ func Run(
 
 	waitErr := proc.Wait()
 	finished := time.Now()
+	// Emit a synthetic RunError carrying waitErr so sinks see the failure
+	// during the live event stream rather than only via RunFinished. Without
+	// this, the parser's typical clean-EOF Finished{Success:true} would leave
+	// the TUI dashboard showing "✓ done" until the orchestrator's summary
+	// arrives — for multi-agent runs that's the entire duration of the other
+	// agents, not just a flicker.
+	if waitErr != nil {
+		synthEvent := reviewtypes.RunError{Err: waitErr}
+		buffer = append(buffer, synthEvent)
+		sawRunError = true
+		if firstRunErr == nil {
+			firstRunErr = waitErr
+		}
+		for _, sink := range sinks {
+			sink.AgentEvent(reviewer.Name(), synthEvent)
+		}
+	}
 	status := classifyStatus(ctx, waitErr, eventOutcome{finishedSeen: finishedSeen, finishedOk: finishedOk, sawRunError: sawRunError})
 	runErr := waitErr
 	if runErr == nil && status == reviewtypes.AgentStatusFailed {

@@ -131,8 +131,18 @@ func RunMulti(
 			for ev := range p.Events() {
 				fanIn <- taggedEvent{agentIdx: idx, ev: ev}
 			}
-			states[idx].waitErr = p.Wait()
+			waitErr := p.Wait()
+			states[idx].waitErr = waitErr
 			states[idx].finishedAt = time.Now()
+			// Emit a synthetic RunError so sinks (TUI dashboard, dump) see
+			// the failure live rather than only via RunFinished. Otherwise
+			// the parser's clean-EOF Finished{Success:true} leaves a row
+			// showing "✓ done" even when the agent process exited non-zero,
+			// for the entire remaining duration of any other still-running
+			// agents.
+			if waitErr != nil {
+				fanIn <- taggedEvent{agentIdx: idx, ev: reviewtypes.RunError{Err: waitErr}}
+			}
 		}(i, proc)
 	}
 

@@ -78,9 +78,12 @@ func TestRunMulti_OneSucceedsOneFails(t *testing.T) {
 	if len(summary.AgentRuns) != 2 {
 		t.Fatalf("expected 2 AgentRuns, got %d", len(summary.AgentRuns))
 	}
-	// Both agents delivered events to the sink.
-	if len(rec.agentEvents) != 4 {
-		t.Errorf("expected 4 AgentEvent calls (2 per agent), got %d", len(rec.agentEvents))
+	// Both agents delivered events to the sink. ok-agent emits 2 events
+	// (Started, Finished); fail-agent emits 2 events (Started, Finished)
+	// plus a synthetic RunError emitted by the orchestrator after Wait()
+	// returns the non-nil exit error — total 5.
+	if len(rec.agentEvents) != 5 {
+		t.Errorf("expected 5 AgentEvent calls (ok: 2, fail: 2 + synthetic RunError), got %d", len(rec.agentEvents))
 	}
 	// Verify per-agent statuses.
 	statusFor := func(name string) reviewtypes.AgentStatus {
@@ -348,6 +351,55 @@ func TestRunMulti_SinkFanOut(t *testing.T) {
 	}
 }
 
+// TestRunMulti_EmitsSyntheticRunErrorWhenAgentWaitErrIsNonNil verifies that
+// when an individual agent's process exits non-zero, the orchestrator emits
+// a RunError event into the live sink stream — not only into the final
+// summary. This is what lets the TUI dashboard flip a row from "✓ done"
+// (from the parser's clean-EOF Finished{Success:true}) to "✗ failed" while
+// other agents are still running. Without this synthetic emission, multi-agent
+// runs would show stale "succeeded" status for the entire duration of any
+// later-finishing agents.
+func TestRunMulti_EmitsSyntheticRunErrorWhenAgentWaitErrIsNonNil(t *testing.T) {
+	t.Parallel()
+	failingWait := errors.New("exit status 1: stderr: invalid_api_key")
+	failer := &stubReviewer{
+		name: "claude-code",
+		events: []reviewtypes.Event{
+			reviewtypes.Started{},
+			reviewtypes.Finished{Success: true}, // parser saw clean EOF
+		},
+		waitErr: failingWait,
+	}
+	succeeder := &stubReviewer{
+		name: "codex",
+		events: []reviewtypes.Event{
+			reviewtypes.Started{},
+			reviewtypes.AssistantText{Text: "looks good"},
+			reviewtypes.Finished{Success: true},
+		},
+	}
+	rec := &stubSinkRecorder{}
+
+	_, err := RunMulti(context.Background(), []reviewtypes.AgentReviewer{failer, succeeder}, reviewtypes.RunConfig{}, []reviewtypes.Sink{rec})
+	if err == nil {
+		t.Fatal("expected non-nil firstErr from failing agent")
+	}
+
+	var found bool
+	for _, evt := range rec.agentEvents {
+		if evt.agent != "claude-code" {
+			continue
+		}
+		if re, ok := evt.ev.(reviewtypes.RunError); ok && re.Err != nil && re.Err.Error() == failingWait.Error() {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Errorf("expected synthetic RunError for failing agent in live sink stream, got events: %+v", rec.agentEvents)
+	}
+}
+
 // TestRunMulti_TokenTracking verifies Tokens overwrite semantics in multi-agent
 // runs: the last Tokens event per agent supersedes earlier ones.
 func TestRunMulti_TokenTracking(t *testing.T) {

@@ -342,6 +342,72 @@ func TestRun_TokenTracking(t *testing.T) {
 	}
 }
 
+func TestRun_EmitsSyntheticRunErrorWhenWaitErrIsNonNil(t *testing.T) {
+	t.Parallel()
+	// Common case: parser reads agent's stdout to clean EOF (no scanner error)
+	// and emits Finished{Success:true} — but the agent process actually exited
+	// non-zero with stderr. The waitErr captures that. Without a synthetic
+	// RunError emitted post-Wait, sinks (TUI, dump) only learn about the
+	// failure via RunFinished — too late for the live dashboard to flip from
+	// "✓ done" to "✗ failed" while other agents are still running.
+	waitErr := errors.New("exit status 1: stderr: invalid_api_key")
+	reviewer := &stubReviewer{
+		name: "claude-code",
+		events: []reviewtypes.Event{
+			reviewtypes.Started{},
+			reviewtypes.Finished{Success: true},
+		},
+		waitErr: waitErr,
+	}
+	rec := &stubSinkRecorder{}
+
+	_, err := Run(context.Background(), reviewer, reviewtypes.RunConfig{}, []reviewtypes.Sink{rec})
+	if err == nil {
+		t.Fatal("expected non-nil error from failing run")
+	}
+
+	// Look for a RunError event in the live stream (forwarded to sinks via
+	// AgentEvent) carrying the wait error.
+	var found bool
+	for _, evt := range rec.agentEvents {
+		if re, ok := evt.ev.(reviewtypes.RunError); ok && re.Err != nil && re.Err.Error() == waitErr.Error() {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Errorf("expected synthetic RunError(waitErr) in live sink stream after Wait, got events: %+v", rec.agentEvents)
+	}
+}
+
+func TestRun_DoesNotEmitSyntheticRunErrorOnCleanExit(t *testing.T) {
+	t.Parallel()
+	// Happy path: process exits 0, parser sees clean EOF. No synthetic
+	// RunError should appear — the existing Finished{Success:true} is the
+	// authoritative signal.
+	reviewer := &stubReviewer{
+		name: "claude-code",
+		events: []reviewtypes.Event{
+			reviewtypes.Started{},
+			reviewtypes.AssistantText{Text: "looks good"},
+			reviewtypes.Finished{Success: true},
+		},
+		waitErr: nil,
+	}
+	rec := &stubSinkRecorder{}
+
+	_, err := Run(context.Background(), reviewer, reviewtypes.RunConfig{}, []reviewtypes.Sink{rec})
+	if err != nil {
+		t.Fatalf("expected nil error on clean exit, got %v", err)
+	}
+
+	for _, evt := range rec.agentEvents {
+		if _, ok := evt.ev.(reviewtypes.RunError); ok {
+			t.Errorf("clean exit should not produce a synthetic RunError, got: %+v", evt.ev)
+		}
+	}
+}
+
 func TestRun_SinkFanOut(t *testing.T) {
 	t.Parallel()
 	events := []reviewtypes.Event{