entireio · peyton-alt · May 13, 2026 · May 14, 2026
@@ -10,7 +10,7 @@ Codex (OpenAI's CLI coding agent) supports lifecycle hooks via `hooks.json` conf
 |-------|--------|-------|
 | Binary present | PASS | `codex` found on PATH |
 | Help available | PASS | `codex --help` shows full subcommand list |
-| Version info | PASS | `codex-cli 0.116.0` |
+| Version info | PASS | `codex-cli 0.130.0` |
 | Hook keywords | PASS | Hook system via `hooks.json` config files |
 | Session keywords | PASS | `resume`, `fork` subcommands; session stored as threads in SQLite + JSONL rollout files |
 | Config directory | PASS | `~/.codex/` (overridable via `CODEX_HOME`) |
@@ -19,7 +19,7 @@ Codex (OpenAI's CLI coding agent) supports lifecycle hooks via `hooks.json` conf
 ## Binary
 
 - Name: `codex`
-- Version: `codex-cli 0.116.0`
+- Version: `codex-cli 0.130.0`
 - Install: `npm install -g @openai/codex` or build from source
 
 ## Hook Mechanism
@@ -191,6 +191,76 @@ The `systemMessage` field can be used to display messages to the user via the ag
 - JSONL output: `codex exec --json "<prompt>"` (events to stdout)
 - Relevant env vars: `CODEX_HOME` (config dir override), `OPENAI_API_KEY` (API auth)
 
+## Plugin / Skill Invocation
+
+Codex's invocation syntax differs from Claude Code's `/<plugin>:<command>`
+form. Three prefixes are used:
+
+| Prefix | Meaning | Example |
+|--------|---------|---------|
+| `/`    | Codex built-in slash-command (reserved; not extensible by user plugins) | `/review`, `/plugins` (non-exhaustive — see `codex-rs/tui/src/slash_command.rs` for the full set) |
+| `@`    | User-installed plugin | `@codex-review-pack` |
+| `$`    | Bundled skill within a plugin | `$thorough-review` |
+
+Plugin install surface: `codex plugin marketplace add <url>`, then `codex
+plugin marketplace upgrade` / `remove`. (Earlier docs may reference `codex
+plugins add` — that subcommand does not exist.)
+
+Skill discovery for codex is currently stubbed in
+`cmd/entire/cli/agent/codex/discovery.go`. When implemented, discovered
+skills must be returned with their actual codex invocation prefix
+(`@plugin-name` or `$skill-name`), never claude's `/<plugin>:<command>`
+form. See the curated install hints in
+`cmd/entire/cli/agent/skilldiscovery/registry.go` for the existing
+per-agent registry pattern.
+
+## Review Performance
+
+Codex review wall-clock varies significantly on identical input (we
+observed a 3x spread across sequential runs with the same prompt, scope,
+and config). The dominant driver is **codex's reasoning model choosing
+how broadly to explore** per turn — not network, caching, or entire's
+wrapper.
+
+The biggest controllable lever is `model_reasoning_effort`. Approximate
+impact:
+
+| `reasoning_effort` | Behavior |
+|---|---|
+| `xhigh` | Thorough; expect 4-6 min on a small diff |
+| `medium` / `low` | Faster; 1-2 min typical, but variance remains |
+
+### Tuning per-spawn (overrides `~/.codex/config.toml`)
+
+`entire review --agent codex` honors per-agent overrides from the
+`review.codex.*` section in `.entire/settings.json` (or clone-local
+preferences at `.git/entire/preferences.json`). Both keys are optional:
+
+```json
+{
+  "review": {
+    "codex": {
+      "skills": ["/review"],
+      "reasoning_effort": "low",
+      "model": "gpt-5-mini"
+    }
+  }
+}
+```
+
+- `reasoning_effort` → `-c model_reasoning_effort=<level>` flag
+- `model` → `-m <model>` flag
+
+Empty values fall back to whatever `~/.codex/config.toml` configures, so
+the global codex config remains the default. Users who want fast codex
+reviews while keeping `xhigh` globally should set `reasoning_effort` to
+`low` or `medium` here.
+
+Codex session transcripts (JSONL rollouts at
+`~/.codex/sessions/YYYY/MM/DD/`) record every tool call codex made and
+the full token breakdown, which is the right place to start if a review
+ran much longer than usual.
+
 ## Gaps & Limitations
 
 - **Hooks require feature flag:** The `codex_hooks` feature is `default_enabled: false` (stage: UnderDevelopment). It must be enabled via `--enable codex_hooks` CLI flag, or `features.codex_hooks = true` in `config.toml`, or `-c features.codex_hooks=true`. Without this, hooks.json is ignored entirely.

@@ -22,6 +22,29 @@ import (
 // Prompt is piped via stdin (the trailing "-" tells codex to read from stdin).
 // Stdout is newline-delimited JSON envelopes (one event per line); no chrome
 // filter needed — each line is parsed directly into an Event.
+//
+// The composed prompt (skills + always-prompt + per-run prompt + scope clause
+// + checkpoint context) is passed verbatim. The `/review` skill name appears
+// as a literal slash-token at the top of the prompt — codex recognises
+// `/review` as one of its built-in slash-commands (see AGENT.md "Plugin /
+// Skill Invocation") and routes through its native review workflow, which
+// in turn references the user's installed code-reviewer skill if one
+// exists (e.g. `~/.codex/skills/code-reviewer/SKILL.md`).
+//
+// We deliberately do NOT paraphrase `/review` into 28 words of generic
+// instruction the way an older version did — that paraphrase obscured the
+// slash-command signal and was a contributor to the wall-clock gap with
+// claude.
+//
+// Note on the rejected alternative: codex's `codex exec review` subcommand
+// would invoke the native review workflow more directly, but it rejects
+// `[PROMPT]` whenever a scope flag (`--base` / `--uncommitted` / `--commit`)
+// is set, and codex hooks don't fire during non-interactive `codex exec`,
+// so there is no available channel to layer entire's user customization
+// (always-prompt, per-run prompt, scope clause, checkpoint context) onto a
+// native-subcommand run. Generic `codex exec` accepting full stdin is the
+// best mechanism today; if codex adds a `--system-prompt-file` (or fires
+// hooks during exec), this can be revisited.
 func NewReviewer() *reviewtypes.ReviewerTemplate {
 	return &reviewtypes.ReviewerTemplate{
 		AgentName: "codex",
@@ -32,37 +55,31 @@ func NewReviewer() *reviewtypes.ReviewerTemplate {
 
 // buildCodexReviewCmd builds the exec.Cmd for a codex review run.
 // Exposed at package level for test inspection of argv, stdin, and env.
+//
+// Per-spawn overrides from settings.ReviewConfig.{Model, ReasoningEffort}
+// translate to codex CLI flags `-m <model>` and `-c model_reasoning_effort=
+// <level>` respectively. Both are inserted before the trailing `-` stdin
+// marker. Empty values are skipped — codex falls back to whatever the
+// user's `~/.codex/config.toml` configures.
 func buildCodexReviewCmd(ctx context.Context, cfg reviewtypes.RunConfig) *exec.Cmd {
-	promptCfg := cfg
-	promptCfg.Skills = expandCodexBuiltinReview(cfg.Skills)
-	args := []string{codexExecCommand, "--skip-git-repo-check", "--json", "-"}
-	prompt := review.ComposeReviewPrompt(promptCfg)
+	args := []string{codexExecCommand, "--skip-git-repo-check", "--json"}
+	if cfg.Model != "" {
+		args = append(args, "-m", cfg.Model)
+	}
+	if cfg.ReasoningEffort != "" {
+		args = append(args, "-c", "model_reasoning_effort="+cfg.ReasoningEffort)
+	}
+	args = append(args, "-")
+
+	prompt := review.ComposeReviewPrompt(cfg)
 	cmd := exec.CommandContext(ctx, "codex", args...)
 	cmd.Stdin = strings.NewReader(prompt)
 	cmd.Env = review.AppendReviewEnv(os.Environ(), "codex", cfg, prompt)
 	return cmd
 }
 
-// Codex's native `exec review --base <branch>` rejects an additional prompt,
-// so expand `/review` into text and run normal `codex exec -`. That preserves
-// Entire's scoped base clause, per-run instructions, and checkpoint context.
-const codexBuiltinReviewPrompt = "Review the current branch changes and report actionable findings. " +
-	"Prioritize correctness, regressions, security, and missing test coverage. Do not make code changes."
-
 const codexExecCommand = "exec"
 
-func expandCodexBuiltinReview(skills []string) []string {
-	out := make([]string, 0, len(skills))
-	for _, skill := range skills {
-		if skill == "/review" {
-			out = append(out, codexBuiltinReviewPrompt)
-			continue
-		}
-		out = append(out, skill)
-	}
-	return out
-}
-
 // parseCodexOutput converts codex's `exec --json` stdout into a stream of
 // Events. Each stdout line is one JSON envelope (top-level "type" field).
 //

@@ -100,7 +100,84 @@ func TestCodexReviewer_ArgvShape(t *testing.T) {
 	}
 }
 
-func TestCodexReviewer_BuiltinReviewExpandsToScopedExecPrompt(t *testing.T) {
+// TestCodexReviewer_ModelAndReasoningEffortOverrides pins that per-spawn
+// `Model` and `ReasoningEffort` from RunConfig translate to the expected
+// codex CLI flags (`-m <model>` and `-c model_reasoning_effort=<level>`)
+// inserted before the trailing `-` stdin marker. Empty values must not
+// emit the flags at all.
+func TestCodexReviewer_ModelAndReasoningEffortOverrides(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name        string
+		cfg         reviewtypes.RunConfig
+		wantContain []string
+		wantOmit    []string
+	}{
+		{
+			name: "both empty: no overrides emitted",
+			cfg:  reviewtypes.RunConfig{Skills: []string{"/review"}},
+			wantOmit: []string{
+				"-m", "model_reasoning_effort=",
+			},
+		},
+		{
+			name: "model only",
+			cfg: reviewtypes.RunConfig{
+				Skills: []string{"/review"},
+				Model:  "gpt-5-mini",
+			},
+			wantContain: []string{"-m", "gpt-5-mini"},
+			wantOmit:    []string{"model_reasoning_effort="},
+		},
+		{
+			name: "reasoning_effort only",
+			cfg: reviewtypes.RunConfig{
+				Skills:          []string{"/review"},
+				ReasoningEffort: "low",
+			},
+			wantContain: []string{"-c", "model_reasoning_effort=low"},
+			wantOmit:    []string{"-m"},
+		},
+		{
+			name: "both: model first then reasoning",
+			cfg: reviewtypes.RunConfig{
+				Skills:          []string{"/review"},
+				Model:           "gpt-5-mini",
+				ReasoningEffort: "medium",
+			},
+			wantContain: []string{
+				"-m", "gpt-5-mini",
+				"-c", "model_reasoning_effort=medium",
+			},
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			cmd := buildCodexReviewCmd(context.Background(), tc.cfg)
+			argv := strings.Join(cmd.Args, " ")
+			for _, want := range tc.wantContain {
+				if !strings.Contains(argv, want) {
+					t.Errorf("argv missing %q: %s", want, argv)
+				}
+			}
+			for _, omit := range tc.wantOmit {
+				if strings.Contains(argv, omit) {
+					t.Errorf("argv must not contain %q: %s", omit, argv)
+				}
+			}
+			// Always-trailing `-` stdin marker must remain last.
+			if cmd.Args[len(cmd.Args)-1] != "-" {
+				t.Errorf("last argv must be '-' (stdin marker), got %q in %v",
+					cmd.Args[len(cmd.Args)-1], cmd.Args)
+			}
+		})
+	}
+}
+
+func TestCodexReviewer_BuiltinReviewPassesThroughInScopedExecPrompt(t *testing.T) {
 	t.Parallel()
 	cfg := reviewtypes.RunConfig{
 		Skills:            []string{"/review"},
@@ -121,18 +198,24 @@ func TestCodexReviewer_BuiltinReviewExpandsToScopedExecPrompt(t *testing.T) {
 	}
 
 	prompt := readCodexCmdStdin(t, cmd)
-	if strings.Contains(prompt, "/review") {
-		t.Fatalf("builtin review prompt should not include raw /review:\n%s", prompt)
+	// /review now passes through to codex verbatim — codex's runtime
+	// auto-loads any installed code-reviewer skill (~/.codex/skills/...)
+	// when it sees the slash token in prompt text.
+	if !strings.Contains(prompt, "/review") {
+		t.Fatalf("composed prompt must contain literal /review token:\n%s", prompt)
+	}
+	// The legacy 28-word paraphrase MUST NOT appear — pinning that regression.
+	if strings.Contains(prompt, "Review the current branch changes and report actionable findings.") {
+		t.Fatalf("composed prompt must not contain the legacy paraphrase:\n%s", prompt)
 	}
 	for _, wantText := range []string{
-		"Review the current branch changes and report actionable findings.",
 		"Focus on auth regressions.",
 		"Scope: review the commits unique to this branch vs main, plus any uncommitted changes in the working tree. Ignore code outside this scope.",
 		"Commits in scope (newest first):",
 		"abc123 summary",
 	} {
 		if !strings.Contains(prompt, wantText) {
-			t.Fatalf("builtin review prompt missing %q:\n%s", wantText, prompt)
+			t.Fatalf("composed prompt missing %q:\n%s", wantText, prompt)
 		}
 	}
 }

@@ -59,8 +59,16 @@ var installHints = map[string][]InstallHint{
 	},
 	"codex": {
 		{
-			Message:     "Install `codex-review-pack` via `codex plugins add <url>`",
-			ProvidesAny: []string{"/codex:adversarial-review"},
+			// Placeholder name until a canonical codex review plugin is pinned
+			// (matches the existing pattern for claude-code / gemini entries
+			// — see comment at the top of installHints). ProvidesAny must use
+			// codex's actual invocation syntax: `@plugin-name` for installed
+			// plugins, `$skill-name` for bundled skills. Slash-commands like
+			// `/review` are reserved for codex built-ins and don't extend to
+			// plugins (claude's `/<plugin>:<command>` form is the misconception
+			// this entry was previously embodying).
+			Message:     "Install `codex-review-pack` via `codex plugin marketplace add <url>`",
+			ProvidesAny: []string{"@codex-review-pack"},
 		},
 	},
 	"gemini": {

@@ -1,11 +1,41 @@
 package skilldiscovery_test
 
 import (
+	"strings"
 	"testing"
 
 	"github.com/entireio/cli/cmd/entire/cli/agent/skilldiscovery"
 )
 
+// TestInstallHintsFor_CodexUsesCodexInvocationSyntax pins the contract that
+// codex install hints use codex's actual invocation syntax (@plugin-name or
+// $skill-name), not claude's /<plugin>:<command> form. The codex picker can
+// never discover a `/plugin:cmd` entry, so a hint with that shape in
+// ProvidesAny is permanently unsuppressable.
+func TestInstallHintsFor_CodexUsesCodexInvocationSyntax(t *testing.T) {
+	t.Parallel()
+	hints := skilldiscovery.ActiveInstallHintsFor("codex", nil)
+	if len(hints) == 0 {
+		// It's valid to have no hints (we may drop them entirely in the
+		// future). If the entry exists, it must use codex syntax.
+		return
+	}
+	for _, h := range hints {
+		for _, providesAny := range h.ProvidesAny {
+			// Reject claude-plugin syntax (`/<plugin>:<command>`) specifically.
+			// Bare `/<name>` (e.g. `/review`) is a legitimate codex built-in
+			// slash-command, so we don't reject it — only the colon-namespaced
+			// form is the bug we're guarding against.
+			if strings.HasPrefix(providesAny, "/") && strings.Contains(providesAny, ":") {
+				t.Errorf("codex install hint ProvidesAny %q uses claude-plugin syntax (/plugin:command); codex plugins are invoked as @plugin-name or $skill-name", providesAny)
+			}
+		}
+		if strings.Contains(h.Message, "codex plugins add") {
+			t.Errorf("codex install hint Message references `codex plugins add` (not a real codex subcommand); use `codex plugin marketplace add <url>` instead. Message: %q", h.Message)
+		}
+	}
+}
+
 func TestCuratedBuiltinsFor_KnownAgents(t *testing.T) {
 	t.Parallel()
 	claude := skilldiscovery.CuratedBuiltinsFor("claude-code")

@@ -795,6 +795,8 @@ func runConfigWithReviewConfig(base reviewtypes.RunConfig, cfg settings.ReviewCo
 
 func applyReviewConfig(runCfg *reviewtypes.RunConfig, cfg settings.ReviewConfig) {
 	runCfg.Skills = cfg.Skills
+	runCfg.Model = cfg.Model
+	runCfg.ReasoningEffort = cfg.ReasoningEffort
 	if len(cfg.Skills) == 0 {
 		runCfg.PromptOverride = cfg.Prompt
 		return

@@ -104,6 +104,18 @@ type RunConfig struct {
 	// the commit that was reviewed.
 	StartingSHA string
 
+	// Model is an optional per-spawn model override the reviewer should
+	// pass to its underlying CLI (e.g. codex `-m <model>`). Empty means
+	// use the agent's default. Reviewers that don't support a model flag
+	// silently ignore this field.
+	Model string
+
+	// ReasoningEffort is an optional per-spawn reasoning-effort override
+	// (e.g. "low", "medium", "high", "xhigh" for codex). Empty means use
+	// the agent's default. Reviewers that don't expose reasoning effort
+	// silently ignore this field.
+	ReasoningEffort string
+
 	// EnrichSummary optionally updates the completed run summary before sinks
 	// receive RunFinished. It is used for post-process data such as token
 	// totals that are only available after agent lifecycle hooks flush state.