Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions plugins/codex/agents/codex-rescue.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Forwarding rules:
- Leave model unset by default. Only add `--model` when the user explicitly asks for a specific model.
- If the user asks for `spark`, map that to `--model gpt-5.3-codex-spark`.
- If the user asks for a concrete model name such as `gpt-5.4-mini`, pass it through with `--model`.
- Copy the user's requested model string into `--model` character-for-character, except for the documented `spark` alias. Do not substitute, shorten, normalize, or guess model names. Never emit a model name the user did not literally type or whose alias they did not literally type.
- Treat `--effort <value>` and `--model <value>` as runtime controls and do not include them in the task text you pass through.
- Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits.
- Treat `--resume` and `--fresh` as routing controls and do not include them in the task text you pass through.
Expand All @@ -38,8 +39,10 @@ Forwarding rules:
- If the user is clearly asking to continue prior Codex work in this repository, such as "continue", "keep going", "resume", "apply the top fix", or "dig deeper", add `--resume-last` unless `--fresh` is present.
- Otherwise forward the task as a fresh `task` run.
- Preserve the user's task text as-is apart from stripping routing flags.
- Return the stdout of the `codex-companion` command exactly as-is.
- If the Bash call fails or Codex cannot be invoked, return nothing.
- Pass the forwarded task text on stdin with `task --stdin`, not as a shell-quoted CLI argument.
- Do not inline multi-line prompts as a quoted argument or as `"$(cat ...)"`; that re-expands prompt content through shell quoting.
- Return the stdout of the `codex-companion` command exactly as-is when it succeeds.
- If the Bash call fails or Codex cannot be invoked, return the failure output exactly once and stop. Do not retry, do not make another `task` invocation, and do not return an empty response.

Response style:

Expand Down
2 changes: 2 additions & 0 deletions plugins/codex/commands/rescue.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task-resume-candidate -
Operating rules:

- The subagent is a thin forwarder only. It should use one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...` and return that command's stdout as-is.
- The subagent must pass forwarded task text on stdin with `task --stdin`, not as a shell-quoted prompt argument.
- If the subagent's Bash call fails, it must return the failure output once and stop rather than retrying or returning an empty response.
- Return the Codex companion stdout verbatim to the user.
- Do not paraphrase, summarize, rewrite, or add commentary before or after it.
- Do not ask the subagent to inspect files, monitor progress, poll `/codex:status`, fetch `/codex:result`, call `/codex:cancel`, summarize output, or do follow-up work of its own.
Expand Down
24 changes: 19 additions & 5 deletions plugins/codex/scripts/codex-companion.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ function printUsage() {
" node scripts/codex-companion.mjs setup [--enable-review-gate|--disable-review-gate] [--json]",
" node scripts/codex-companion.mjs review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>]",
" node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>] [focus text]",
" node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [prompt]",
" node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [--stdin|--prompt-file <path>|prompt]",
" node scripts/codex-companion.mjs status [job-id] [--all] [--json]",
" node scripts/codex-companion.mjs result [job-id] [--json]",
" node scripts/codex-companion.mjs cancel [job-id] [--json]"
Expand Down Expand Up @@ -610,7 +610,20 @@ function buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId
};
}

function readTaskPrompt(cwd, options, positionals) {
function readTaskPrompt(cwd, options, positionals, context = {}) {
if (options.stdin && options["prompt-file"]) {
throw new Error("Choose either --stdin or --prompt-file.");
}
if (options.stdin && positionals.length > 0) {
throw new Error("Do not pass positional prompt text when using --stdin.");
}
if (options.stdin) {
const prompt = readStdinIfPiped();
if (!prompt && !context.resumeLast) {
throw new Error("Expected prompt text on stdin.");
}
return prompt;
}
if (options["prompt-file"]) {
return fs.readFileSync(path.resolve(cwd, options["prompt-file"]), "utf8");
}
Expand Down Expand Up @@ -732,7 +745,7 @@ async function handleReview(argv) {
async function handleTask(argv) {
const { options, positionals } = parseCommandInput(argv, {
valueOptions: ["model", "effort", "cwd", "prompt-file"],
booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background"],
booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background", "stdin"],
aliasMap: {
m: "model"
}
Expand All @@ -742,13 +755,14 @@ async function handleTask(argv) {
const workspaceRoot = resolveCommandWorkspace(options);
const model = normalizeRequestedModel(options.model);
const effort = normalizeReasoningEffort(options.effort);
const prompt = readTaskPrompt(cwd, options, positionals);

const resumeLast = Boolean(options["resume-last"] || options.resume);
const fresh = Boolean(options.fresh);
if (resumeLast && fresh) {
throw new Error("Choose either --resume/--resume-last or --fresh.");
}
const prompt = readTaskPrompt(cwd, options, positionals, {
resumeLast
});
const write = Boolean(options.write);
const taskMetadata = buildTaskRunMetadata({
prompt,
Expand Down
3 changes: 1 addition & 2 deletions plugins/codex/scripts/lib/codex.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ function buildThreadParams(cwd, options = {}) {
approvalPolicy: options.approvalPolicy ?? "never",
sandbox: options.sandbox ?? "read-only",
serviceName: SERVICE_NAME,
ephemeral: options.ephemeral ?? true,
experimentalRawEvents: false
ephemeral: options.ephemeral ?? true
};
}

Expand Down
7 changes: 5 additions & 2 deletions plugins/codex/skills/codex-cli-runtime/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ user-invocable: false
Use this skill only inside the `codex:codex-rescue` subagent.

Primary helper:
- `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task "<raw arguments>"`
- `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task --stdin ...`

Execution rules:
- The rescue subagent is a forwarder, not an orchestrator. Its only job is to invoke `task` once and return that stdout unchanged.
Expand All @@ -21,6 +21,7 @@ Execution rules:
- Leave `--effort` unset unless the user explicitly requests a specific effort.
- Leave model unset by default. Add `--model` only when the user explicitly asks for one.
- Map `spark` to `--model gpt-5.3-codex-spark`.
- Copy the user's requested model string into `--model` character-for-character, except for the documented `spark` alias. Do not substitute, shorten, normalize, or guess model names.
- Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits.

Command selection:
Expand All @@ -34,10 +35,12 @@ Command selection:
- `--fresh`: always use a fresh `task` run, even if the request sounds like a follow-up.
- `--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`.
- `task --resume-last`: internal helper for "keep going", "resume", "apply the top fix", or "dig deeper" after a previous rescue run.
- Pass the forwarded task text on stdin with `task --stdin`.
- Do not pass raw task text as a shell-quoted CLI argument, and do not use `"$(cat ...)"` to inline file contents.

Safety rules:
- Default to write-capable Codex work in `codex:codex-rescue` unless the user explicitly asks for read-only behavior.
- Preserve the user's task text as-is apart from stripping routing flags.
- Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own.
- Return the stdout of the `task` command exactly as-is.
- If the Bash call fails or Codex cannot be invoked, return nothing.
- If the Bash call fails or Codex cannot be invoked, return the failure output exactly once and stop. Do not retry, do not make another `task` invocation, and do not return an empty response.
19 changes: 16 additions & 3 deletions tests/commands.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -135,23 +135,36 @@ test("rescue command absorbs continue semantics", () => {
assert.match(agent, /Leave model unset by default/i);
assert.match(agent, /If the user asks for `spark`, map that to `--model gpt-5\.3-codex-spark`/i);
assert.match(agent, /If the user asks for a concrete model name such as `gpt-5\.4-mini`, pass it through with `--model`/i);
assert.match(agent, /Copy the user's requested model string into `--model` character-for-character, except for the documented `spark` alias/i);
assert.match(agent, /Never emit a model name the user did not literally type or whose alias they did not literally type/i);
assert.match(agent, /Pass the forwarded task text on stdin with `task --stdin`, not as a shell-quoted CLI argument/i);
assert.doesNotMatch(agent, /mktemp|chmod 600|trap/);
assert.match(agent, /Do not inline multi-line prompts as a quoted argument or as `"\$\(cat \.\.\.\)"`/i);
assert.match(agent, /Return the stdout of the `codex-companion` command exactly as-is/i);
assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return the failure output exactly once and stop/i);
assert.match(agent, /Do not retry, do not make another `task` invocation, and do not return an empty response/i);
assert.doesNotMatch(agent, /return nothing/i);
assert.match(agent, /gpt-5-4-prompting/);
assert.match(agent, /only to tighten the user's request into a better Codex prompt/i);
assert.match(agent, /Do not use that skill to inspect the repository, reason through the problem yourself, draft a solution, or do any independent work/i);
assert.match(runtimeSkill, /codex-companion\.mjs" task --stdin \.\.\./);
assert.match(runtimeSkill, /only job is to invoke `task` once and return that stdout unchanged/i);
assert.match(runtimeSkill, /Do not call `setup`, `review`, `adversarial-review`, `status`, `result`, or `cancel`/i);
assert.match(runtimeSkill, /use the `gpt-5-4-prompting` skill to rewrite the user's request into a tighter Codex prompt/i);
assert.match(runtimeSkill, /That prompt drafting is the only Claude-side work allowed/i);
assert.match(runtimeSkill, /Leave `--effort` unset unless the user explicitly requests a specific effort/i);
assert.match(runtimeSkill, /Leave model unset by default/i);
assert.match(runtimeSkill, /Map `spark` to `--model gpt-5\.3-codex-spark`/i);
assert.match(runtimeSkill, /Copy the user's requested model string into `--model` character-for-character, except for the documented `spark` alias/i);
assert.match(runtimeSkill, /If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only/i);
assert.match(runtimeSkill, /Strip it before calling `task`/i);
assert.match(runtimeSkill, /`--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`/i);
assert.match(runtimeSkill, /Pass the forwarded task text on stdin with `task --stdin`/i);
assert.match(runtimeSkill, /Do not pass raw task text as a shell-quoted CLI argument/i);
assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i);
assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return the failure output exactly once and stop/i);
assert.match(runtimeSkill, /Do not retry, do not make another `task` invocation, and do not return an empty response/i);
assert.doesNotMatch(runtimeSkill, /return nothing/i);
assert.match(readme, /`codex:codex-rescue` subagent/i);
assert.match(readme, /if you do not pass `--model` or `--effort`, Codex chooses its own defaults/i);
assert.match(readme, /--model gpt-5\.4-mini --effort medium/i);
Expand Down Expand Up @@ -186,7 +199,7 @@ test("internal docs use task terminology for rescue runs", () => {
const promptingSkill = read("skills/gpt-5-4-prompting/SKILL.md");
const promptRecipes = read("skills/gpt-5-4-prompting/references/codex-prompt-recipes.md");

assert.match(runtimeSkill, /codex-companion\.mjs" task "<raw arguments>"/);
assert.match(runtimeSkill, /codex-companion\.mjs" task --stdin \.\.\./);
assert.match(runtimeSkill, /Use `task` for every rescue request/i);
assert.match(runtimeSkill, /task --resume-last/i);
assert.match(promptingSkill, /Use `task` when the task is diagnosis/i);
Expand Down
110 changes: 110 additions & 0 deletions tests/runtime.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,116 @@ test("task --fresh is treated as routing control and does not leak into the prom
assert.equal(fakeState.lastTurnStart.prompt, "diagnose the flaky test");
});

test("task --prompt-file preserves multiline shell-sensitive prompt text", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
const statePath = path.join(binDir, "fake-codex-state.json");
installFakeCodex(binDir);
initGitRepo(repo);
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
run("git", ["add", "README.md"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });

const prompt = "diagnose quoting\nliteral $(echo should-not-run)\nquote ' and double \" and backslash \\";
const promptFile = path.join(repo, "prompt.txt");
fs.writeFileSync(promptFile, prompt, "utf8");

const result = run("node", [SCRIPT, "task", "--prompt-file", promptFile], {
cwd: repo,
env: buildEnv(binDir)
});

assert.equal(result.status, 0, result.stderr);
const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8"));
assert.equal(fakeState.lastTurnStart.prompt, prompt);
});

test("task --stdin preserves multiline shell-sensitive prompt text", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
const statePath = path.join(binDir, "fake-codex-state.json");
installFakeCodex(binDir);
initGitRepo(repo);
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
run("git", ["add", "README.md"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });

const prompt = "diagnose quoting\nliteral $(echo should-not-run)\nquote ' and double \" and backslash \\";
const result = run("node", [SCRIPT, "task", "--stdin"], {
cwd: repo,
env: buildEnv(binDir),
input: prompt
});

assert.equal(result.status, 0, result.stderr);
const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8"));
assert.equal(fakeState.lastTurnStart.prompt, prompt);
});

test("task --stdin rejects mixed prompt sources", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
installFakeCodex(binDir);
initGitRepo(repo);
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
run("git", ["add", "README.md"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });

const withPositional = run("node", [SCRIPT, "task", "--stdin", "diagnose"], {
cwd: repo,
env: buildEnv(binDir),
input: "from stdin"
});
assert.notEqual(withPositional.status, 0);
assert.match(withPositional.stderr, /Do not pass positional prompt text when using --stdin/);

const promptFile = path.join(repo, "prompt.txt");
fs.writeFileSync(promptFile, "from file", "utf8");
const withPromptFile = run("node", [SCRIPT, "task", "--stdin", "--prompt-file", promptFile], {
cwd: repo,
env: buildEnv(binDir),
input: "from stdin"
});
assert.notEqual(withPromptFile.status, 0);
assert.match(withPromptFile.stderr, /Choose either --stdin or --prompt-file/);
});

test("task --stdin allows empty prompt only when resuming", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
const statePath = path.join(binDir, "fake-codex-state.json");
installFakeCodex(binDir);
initGitRepo(repo);
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
run("git", ["add", "README.md"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });

const firstRun = run("node", [SCRIPT, "task", "initial task"], {
cwd: repo,
env: buildEnv(binDir)
});
assert.equal(firstRun.status, 0, firstRun.stderr);

const resume = run("node", [SCRIPT, "task", "--stdin", "--resume-last"], {
cwd: repo,
env: buildEnv(binDir),
input: ""
});

assert.equal(resume.status, 0, resume.stderr);
const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8"));
assert.equal(fakeState.lastTurnStart.threadId, "thr_1");
assert.equal(fakeState.lastTurnStart.prompt, "Continue from the current thread state. Pick the next highest-value step and follow through until the task is resolved.");

const fresh = run("node", [SCRIPT, "task", "--stdin"], {
cwd: repo,
env: buildEnv(binDir),
input: ""
});
assert.notEqual(fresh.status, 0);
assert.match(fresh.stderr, /Expected prompt text on stdin/);
});

test("task forwards model selection and reasoning effort to app-server turn/start", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
Expand Down