Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ The redesign eliminated several constructs from the prior implementation. None s
- `--track-only` flag (intentionally removed by #1009)
- `--postreview` / `--finalize` / empty review commits / `/entire-review:finish` skill installer
- `Launcher` + `HeadlessLauncher` as separate interfaces (single `AgentReviewer`)
- `filterCodexOutput` in shared multi-agent code (lives in codex's adapter)
- Codex chrome-line filtering or any agent-specific stdout post-processing in shared multi-agent code (per-agent parsers own their format; shared code only sees `Event` variants)
- `sync.Once`-guarded onCancel + parallel `signal.Notify` goroutine (single cancel from start)

#### Key Files
Expand All @@ -772,7 +772,7 @@ The redesign eliminated several constructs from the prior implementation. None s
- `cmd/entire/cli/review/synthesis_sink.go` / `synthesis_prompt.go` — opt-in cross-agent verdict
- `cmd/entire/cli/review/types/{reviewer,sink,template}.go` — interface contracts (CU2 + CU4 + CU5b)
- `cmd/entire/cli/review/env.go` — `ENTIRE_REVIEW_*` constants + `EncodeSkills`/`DecodeSkills` + `AppendReviewEnv`
- `cmd/entire/cli/agent/{claudecode,codex,geminicli}/reviewer.go` — per-agent `AgentReviewer` implementations (claude-code, codex with chrome filter, gemini-cli)
- `cmd/entire/cli/agent/{claudecode,codex,geminicli}/reviewer.go` — per-agent `AgentReviewer` implementations (claude-code, codex, gemini-cli)
- `cmd/entire/cli/agent/claudecode/discovery.go` — skill discovery + `pickLatestVersion` plugin-cache dedupe
- `cmd/entire/cli/lifecycle.go` — `adoptReviewEnv` reads `ENTIRE_REVIEW_*` from process env; replaces marker-file adoption
- `cmd/entire/cli/review_bridge.go` / `review_helpers.go` — bridge code in `cli` package for cycle-bound functions (`headHasReviewCheckpoint`, `launchableReviewerFor`, `newReviewAttachCmd`, `lazySynthesisProvider`)
Expand Down
93 changes: 81 additions & 12 deletions cmd/entire/cli/agent/claudecode/reviewer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package claudecode
import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"
"os"
Expand All @@ -12,11 +13,18 @@ import (
reviewtypes "github.com/entireio/cli/cmd/entire/cli/review/types"
)

// envelopeTypeAssistant is the stream-json envelope type for assistant
// messages (per-content-block events). Shared with transcript.go's usage.
const envelopeTypeAssistant = "assistant"

// NewReviewer returns the AgentReviewer for claude-code.
//
// Argv shape: claude -p <prompt>. Plain-text stdout.
// Argv shape: claude -p <prompt> --output-format stream-json --verbose.
// The prompt is passed as a command-line argument; stdin is unused.
// Stdout in -p mode is the assistant's plain-text response (no JSON envelope).
// Stdout is newline-delimited JSON envelopes (one event per line), which the
// parser decodes into the review Event stream. This format gives the parser
// per-message granularity (each assistant content block surfaces as it is
// produced) instead of buffering until end-of-run like plain-text -p mode.
func NewReviewer() *reviewtypes.ReviewerTemplate {
return &reviewtypes.ReviewerTemplate{
AgentName: "claude-code",
Expand All @@ -29,16 +37,21 @@ func NewReviewer() *reviewtypes.ReviewerTemplate {
// Exposed at package level for test inspection of argv and env.
func buildReviewCmd(ctx context.Context, cfg reviewtypes.RunConfig) *exec.Cmd {
prompt := review.ComposeReviewPrompt(cfg)
cmd := exec.CommandContext(ctx, "claude", "-p", prompt)
cmd := exec.CommandContext(ctx, "claude", "-p", prompt, "--output-format", "stream-json", "--verbose")
cmd.Env = review.AppendReviewEnv(os.Environ(), "claude-code", cfg, prompt)
return cmd
}

// parseClaudeOutput converts claude's -p mode stdout into a stream of Events.
// In -p mode claude emits the assistant's response as plain text (one line per
// stdout line). The parser emits Started once, then one AssistantText per
// non-empty line, then Finished{Success: true} on clean EOF or
// RunError + Finished{Success: false} on a torn stream (scanner error).
// parseClaudeOutput converts claude's --output-format stream-json --verbose
// stdout into a stream of Events. Each stdout line is one JSON envelope:
// - {"type":"system",...} session metadata / hooks; swallowed
// - {"type":"assistant",...} per content block: text → AssistantText,
// tool_use → ToolCall, thinking → swallowed
// - {"type":"user",...} tool_result echoes; swallowed
// - {"type":"result",...} final summary; emits Tokens then Finished
//
// Emits Started first, Finished{Success:...} last (success follows result.is_error).
// On a scanner error (torn stream), emits RunError then Finished{Success:false}.
//
// Exposed for golden-file contract testing.
func parseClaudeOutput(r io.Reader) <-chan reviewtypes.Event {
Expand All @@ -48,19 +61,75 @@ func parseClaudeOutput(r io.Reader) <-chan reviewtypes.Event {
out <- reviewtypes.Started{}
scanner := bufio.NewScanner(r)
scanner.Buffer(make([]byte, 1024*1024), 16*1024*1024)
var sawResult bool
var resultErr bool
var resultUsage messageUsage
for scanner.Scan() {
line := scanner.Text()
if line == "" {
line := scanner.Bytes()
if len(line) == 0 {
continue
}
var env claudeEnvelope
if err := json.Unmarshal(line, &env); err != nil {
out <- reviewtypes.RunError{Err: fmt.Errorf("claude stream-json: %w", err)}
continue
}
out <- reviewtypes.AssistantText{Text: line}
switch env.Type {
case envelopeTypeAssistant:
for _, block := range env.Message.Content {
switch block.Type {
case "text":
if block.Text != "" {
out <- reviewtypes.AssistantText{Text: block.Text}
}
case "tool_use":
// block.Input is a json.RawMessage; passing it through as a
// string preserves the agent-defined shape without a
// re-marshal round trip. Empty input becomes "" so consumers
// see a falsy Args.
out <- reviewtypes.ToolCall{Name: block.Name, Args: string(block.Input)}
}
}
case "result":
sawResult = true
resultErr = env.IsError
resultUsage = env.Usage
}
}
if err := scanner.Err(); err != nil {
out <- reviewtypes.RunError{Err: fmt.Errorf("read stdout: %w", err)}
out <- reviewtypes.Finished{Success: false}
return
}
out <- reviewtypes.Finished{Success: true}
if sawResult {
in := resultUsage.InputTokens + resultUsage.CacheReadInputTokens + resultUsage.CacheCreationInputTokens
out <- reviewtypes.Tokens{In: in, Out: resultUsage.OutputTokens}
out <- reviewtypes.Finished{Success: !resultErr}
return
}
out <- reviewtypes.Finished{Success: false}
}()
return out
}

type claudeEnvelope struct {
Type string `json:"type"`
Message claudeMessage `json:"message"`
IsError bool `json:"is_error"`
// Usage reuses the package-local messageUsage type (declared in types.go)
// rather than a duplicate ad-hoc struct, so the two consumers of the
// Claude API usage shape (transcript parsing + stream-json review parser)
// can't drift apart.
Usage messageUsage `json:"usage"`
}

type claudeMessage struct {
Content []claudeBlock `json:"content"`
}

type claudeBlock struct {
Type string `json:"type"`
Text string `json:"text"`
Name string `json:"name"`
Input json.RawMessage `json:"input"`
}
194 changes: 161 additions & 33 deletions cmd/entire/cli/agent/claudecode/reviewer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"os/exec"
"strings"
"testing"
"time"

"github.com/entireio/cli/cmd/entire/cli/review"
reviewtypes "github.com/entireio/cli/cmd/entire/cli/review/types"
Expand Down Expand Up @@ -81,9 +82,10 @@ func TestReviewer_ArgvShape(t *testing.T) {
}
cmd := buildReviewCmd(context.Background(), cfg)

// Expect: claude -p <prompt>
if len(cmd.Args) < 3 {
t.Fatalf("expected at least 3 args, got %d: %v", len(cmd.Args), cmd.Args)
// Expect: claude -p <prompt> --output-format stream-json --verbose
wantSuffix := []string{"--output-format", "stream-json", "--verbose"}
if len(cmd.Args) != 3+len(wantSuffix) {
t.Fatalf("expected %d args, got %d: %v", 3+len(wantSuffix), len(cmd.Args), cmd.Args)
}
if cmd.Args[0] != "claude" {
t.Errorf("Args[0] = %q, want %q", cmd.Args[0], "claude")
Expand All @@ -95,6 +97,12 @@ func TestReviewer_ArgvShape(t *testing.T) {
if cmd.Args[2] == "" {
t.Error("Args[2] (prompt) is empty")
}
for i, want := range wantSuffix {
got := cmd.Args[3+i]
if got != want {
t.Errorf("Args[%d] = %q, want %q", 3+i, got, want)
}
}
for _, arg := range cmd.Args {
if arg == "--continue" || arg == "-c" || arg == "--resume" || arg == "-r" {
t.Fatalf("Args must start a fresh Claude review, got resume/continue flag in %v", cmd.Args)
Expand Down Expand Up @@ -182,56 +190,176 @@ func TestParseClaudeOutput_ReportsScannerError(t *testing.T) {
}
}

func TestReviewer_EventStream(t *testing.T) {
func TestParseClaudeOutput_DecodesStreamJSON(t *testing.T) {
t.Parallel()

data, err := os.ReadFile("testdata/canned_session.txt")
data, err := os.ReadFile("testdata/stream_session.jsonl")
if err != nil {
t.Fatalf("read fixture: %v", err)
}

events := collectEvents(parseClaudeOutput(strings.NewReader(string(data))))

if len(events) < 3 {
t.Fatalf("expected at least 3 events (Started + at least one AssistantText + Finished), got %d", len(events))
}

// First event must be Started.
if _, ok := events[0].(reviewtypes.Started); !ok {
t.Errorf("events[0] = %T, want Started", events[0])
}

// Last event must be Finished{Success: true}.
last := events[len(events)-1]
fin, ok := last.(reviewtypes.Finished)
if !ok {
t.Errorf("last event = %T, want Finished", last)
} else if !fin.Success {
t.Errorf("Finished.Success = false, want true")
if !ok || !fin.Success {
t.Errorf("last event = %v, want Finished{Success:true}", last)
}

// All middle events must be AssistantText (no empty lines emitted).
for i := 1; i < len(events)-1; i++ {
at, ok := events[i].(reviewtypes.AssistantText)
if !ok {
t.Errorf("events[%d] = %T, want AssistantText", i, events[i])
continue
var sawText bool
for _, ev := range events {
if at, ok := ev.(reviewtypes.AssistantText); ok && strings.Contains(at.Text, "Cats are") {
sawText = true
}
}
if !sawText {
t.Error("expected AssistantText carrying fixture prose 'Cats are…'")
}

var tokensSeen int
var tokensOut int
for _, ev := range events {
if tk, ok := ev.(reviewtypes.Tokens); ok {
tokensSeen++
tokensOut = tk.Out
}
if at.Text == "" {
t.Errorf("events[%d].Text is empty (empty lines must be skipped)", i)
}
if tokensSeen != 1 {
t.Errorf("Tokens count = %d, want 1", tokensSeen)
}
if tokensOut == 0 {
t.Error("Tokens.Out = 0, want > 0")
}
}
Comment thread
peyton-alt marked this conversation as resolved.

func TestParseClaudeOutput_StreamsEventsBeforeEOF(t *testing.T) {
t.Parallel()
pr, pw := io.Pipe()
events := parseClaudeOutput(pr)

expect := func(t *testing.T, want string) reviewtypes.Event {
t.Helper()
select {
case ev, ok := <-events:
if !ok {
t.Fatalf("event channel closed waiting for %s", want)
}
return ev
case <-time.After(2 * time.Second):
t.Fatalf("timed out waiting for %s — parser did not stream before EOF", want)
return nil
}
}

// Verify fixture content appears somewhere in the text events.
var combined strings.Builder
// First emitted event is always Started — before we even write anything.
if _, ok := expect(t, "Started").(reviewtypes.Started); !ok {
t.Fatal("first event must be Started")
}

// Write a system/init envelope — swallowed, no event read here.
if _, err := pw.Write([]byte(`{"type":"system","subtype":"init","session_id":"sid"}` + "\n")); err != nil {
t.Fatalf("pipe write: %v", err)
}

// Write an assistant text envelope — expect AssistantText before EOF.
if _, err := pw.Write([]byte(`{"type":"assistant","message":{"content":[{"type":"text","text":"hello"}]}}` + "\n")); err != nil {
t.Fatalf("pipe write: %v", err)
}
ev := expect(t, "AssistantText")
at, ok := ev.(reviewtypes.AssistantText)
if !ok {
t.Fatalf("event = %T (%+v), want AssistantText", ev, ev)
}
if at.Text != "hello" {
t.Errorf("AssistantText.Text = %q, want %q", at.Text, "hello")
}

// Write an assistant tool_use envelope — expect ToolCall before EOF.
// This also covers the unexercised tool_use branch that was flagged in
// the PR's fixture-coverage review.
if _, err := pw.Write([]byte(`{"type":"assistant","message":{"content":[{"type":"tool_use","id":"tu_1","name":"Read","input":{"file_path":"x"}}]}}` + "\n")); err != nil {
t.Fatalf("pipe write: %v", err)
}
ev = expect(t, "ToolCall")
tc, ok := ev.(reviewtypes.ToolCall)
if !ok {
t.Fatalf("event = %T (%+v), want ToolCall", ev, ev)
}
if tc.Name != "Read" {
t.Errorf("ToolCall.Name = %q, want %q", tc.Name, "Read")
}
if !strings.Contains(tc.Args, `"file_path":"x"`) {
t.Errorf("ToolCall.Args = %q, want to contain file_path:x", tc.Args)
}

// Write the result envelope and close — expect Tokens then Finished.
if _, err := pw.Write([]byte(`{"type":"result","subtype":"success","is_error":false,"usage":{"input_tokens":100,"output_tokens":42,"cache_read_input_tokens":0,"cache_creation_input_tokens":0}}` + "\n")); err != nil {
t.Fatalf("pipe write: %v", err)
}
_ = pw.Close()

ev = expect(t, "Tokens")
tk, ok := ev.(reviewtypes.Tokens)
if !ok {
t.Fatalf("event = %T (%+v), want Tokens", ev, ev)
}
if tk.Out != 42 || tk.In != 100 {
t.Errorf("Tokens = %+v, want {In:100, Out:42}", tk)
}
ev = expect(t, "Finished")
fin, ok := ev.(reviewtypes.Finished)
if !ok {
t.Fatalf("event = %T (%+v), want Finished", ev, ev)
}
if !fin.Success {
t.Error("Finished.Success = false, want true")
}
}

func TestParseClaudeOutput_NoResultEnvelopeMeansFailed(t *testing.T) {
t.Parallel()
// A truncated session: assistant message but no `result` envelope.
// The parser must surface this as Finished{Success: false} so the
// caller distinguishes "agent exited mid-generation" from "agent
// completed successfully".
input := `{"type":"assistant","message":{"content":[{"type":"text","text":"partial"}]}}` + "\n"
events := collectEvents(parseClaudeOutput(strings.NewReader(input)))

last := events[len(events)-1]
fin, ok := last.(reviewtypes.Finished)
if !ok {
t.Fatalf("last event = %T, want Finished", last)
}
if fin.Success {
t.Error("Finished.Success = true, want false on missing result envelope")
}
}

func TestParseClaudeOutput_GarbledLineEmitsRunErrorAndContinues(t *testing.T) {
t.Parallel()
// A garbled non-JSON line between valid envelopes must not abort the
// parser. The bad line surfaces as RunError; the stream continues to
// consume subsequent envelopes including a clean result.
input := `{"type":"assistant","message":{"content":[{"type":"text","text":"ok"}]}}` + "\n" +
"this is not json" + "\n" +
`{"type":"result","subtype":"success","is_error":false,"usage":{"output_tokens":1}}` + "\n"
events := collectEvents(parseClaudeOutput(strings.NewReader(input)))

var sawRunError, sawSuccess bool
for _, ev := range events {
if at, ok := ev.(reviewtypes.AssistantText); ok {
combined.WriteString(at.Text)
combined.WriteString("\n")
if _, ok := ev.(reviewtypes.RunError); ok {
sawRunError = true
}
if fin, ok := ev.(reviewtypes.Finished); ok && fin.Success {
sawSuccess = true
}
}
if !sawRunError {
t.Error("expected RunError for garbled line")
}
if !strings.Contains(combined.String(), "AgentReviewer") {
t.Error("expected fixture content mentioning 'AgentReviewer' to appear in AssistantText events")
if !sawSuccess {
t.Error("expected Finished{Success:true} after recovering from garbled line")
}
}

Expand Down
Loading
Loading