openai · nvp159 · Apr 28, 2025 · Apr 28, 2025 · Apr 28, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -42,12 +42,19 @@ jobs:
       - name: Install dependencies
         run: pnpm install
 
+      - name: Fix code formatting
+        working-directory: codex-cli
+        run: pnpm run format:fix
+
       # Run all tasks using workspace filters
 
       - name: Check TypeScript code formatting
         working-directory: codex-cli
         run: pnpm run format
 
+      - name: Fix Markdown and config file formatting
+        run: pnpm run format:fix
+
       - name: Check Markdown and config file formatting
         run: pnpm run format
 

diff --git a/README.md b/README.md
@@ -222,6 +222,16 @@ The hardening mechanism Codex uses depends on your OS:
 
 Key flags: `--model/-m`, `--approval-mode/-a`, `--quiet/-q`, and `--notify`.
 
+**Retry & Rate-Limit Options**
+
+- `--max-retries <number>` Maximum number of retry attempts for rate-limit and transient errors (default: 5)
+- `--base-delay-ms <ms>` Base back-off delay in milliseconds (default: 2500)
+- `--max-delay-ms <ms>` Maximum back-off delay in milliseconds (default: 60000)
+
+**Token Usage Reporting**
+
+- `--token-report` Print a token usage summary after each response
+
 ---
 
 ## Memory & Project Docs

diff --git a/codex-cli/src/cli.tsx b/codex-cli/src/cli.tsx
@@ -165,6 +165,28 @@ const cli = meow(
         type: "boolean",
         description: "Enable desktop notifications for responses",
       },
+      // Retry/back-off options
+      maxRetries: {
+        type: "number",
+        default: 5,
+        description:
+          "Max number of retry attempts for rate-limit and transient errors",
+      },
+      baseDelayMs: {
+        type: "number",
+        default: 2500,
+        description: "Base delay in milliseconds for exponential back-off",
+      },
+      maxDelayMs: {
+        type: "number",
+        default: 60000,
+        description: "Maximum back-off delay in milliseconds",
+      },
+      tokenReport: {
+        type: "boolean",
+        default: false,
+        description: "Print token usage report after each request",
+      },
 
       disableResponseStorage: {
         type: "boolean",
@@ -294,6 +316,11 @@ config = {
     cli.flags.disableResponseStorage !== undefined
       ? Boolean(cli.flags.disableResponseStorage)
       : config.disableResponseStorage,
+  // Retry/back-off and token-report settings
+  maxRetries: cli.flags.maxRetries,
+  baseDelayMs: cli.flags.baseDelayMs,
+  maxDelayMs: cli.flags.maxDelayMs,
+  tokenReport: Boolean(cli.flags.tokenReport),
 };
 
 // Check for updates after loading config. This is important because we write state file in

diff --git a/codex-cli/src/components/chat/terminal-chat.tsx b/codex-cli/src/components/chat/terminal-chat.tsx
@@ -12,6 +12,7 @@ import { useConfirmation } from "../../hooks/use-confirmation.js";
 import { useTerminalSize } from "../../hooks/use-terminal-size.js";
 import { AgentLoop } from "../../utils/agent/agent-loop.js";
 import { ReviewDecision } from "../../utils/agent/review.js";
+import { approximateTokensUsed } from "../../utils/approximate-tokens-used";
 import { generateCompactSummary } from "../../utils/compact-summary.js";
 import { getBaseUrl, getApiKey, saveConfig } from "../../utils/config.js";
 import { extractAppliedPatches as _extractAppliedPatches } from "../../utils/extract-applied-patches.js";
@@ -148,10 +149,51 @@ export default function TerminalChat({
   const [lastResponseId, setLastResponseId] = useState<string | null>(null);
   const [items, setItems] = useState<Array<ResponseItem>>([]);
   const [loading, setLoading] = useState<boolean>(false);
+  // Token-report refs to track per-turn usage
+  const prevPromptTokRef = useRef<number>(0);
+  const prevCompletionTokRef = useRef<number>(0);
   const [approvalPolicy, setApprovalPolicy] = useState<ApprovalPolicy>(
     initialApprovalPolicy,
   );
   const [thinkingSeconds, setThinkingSeconds] = useState(0);
+  // Inject a token usage report after each completed response
+  useEffect(() => {
+    if (config.tokenReport && lastResponseId) {
+      // Filter only chat message items with explicit roles
+      // Filter messages with role 'user'
+      const userItems = items.filter(
+        (it): it is ResponseItem & { type: "message"; role: "user" } => {
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          return it.type === "message" && (it as any).role === "user";
+        },
+      );
+      // Filter messages with role 'assistant'
+      const aiItems = items.filter(
+        (it): it is ResponseItem & { type: "message"; role: "assistant" } => {
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          return it.type === "message" && (it as any).role === "assistant";
+        },
+      );
+      const promptTok = approximateTokensUsed(userItems);
+      const completionTok = approximateTokensUsed(aiItems);
+      const deltaPrompt = promptTok - prevPromptTokRef.current;
+      const deltaCompletion = completionTok - prevCompletionTokRef.current;
+      const deltaTotal = deltaPrompt + deltaCompletion;
+      prevPromptTokRef.current = promptTok;
+      prevCompletionTokRef.current = completionTok;
+      const text = `Prompt: ${deltaPrompt} tok   Completion: ${deltaCompletion} tok   Total: ${deltaTotal} tok`;
+      setItems((prev) => [
+        ...prev,
+        {
+          id: `token-report-${Date.now()}`,
+          type: "message",
+          role: "system",
+          content: [{ type: "input_text", text }],
+        } as ResponseItem,
+      ]);
+    }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [lastResponseId]);
 
   const handleCompact = async () => {
     setLoading(true);

diff --git a/codex-cli/src/utils/agent/agent-loop.ts b/codex-cli/src/utils/agent/agent-loop.ts
@@ -670,8 +670,8 @@ export class AgentLoop {
         // Send request to OpenAI with retry on timeout.
         let stream;
 
-        // Retry loop for transient errors. Up to MAX_RETRIES attempts.
-        const MAX_RETRIES = 5;
+        // Retry loop for transient errors. Up to maxRetries attempts.
+        const MAX_RETRIES = this.config.maxRetries ?? 5;
         for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
           try {
             let reasoning: Reasoning | undefined;
@@ -779,19 +779,25 @@ export class AgentLoop {
               /rate limit/i.test(errCtx.message ?? "");
             if (isRateLimit) {
               if (attempt < MAX_RETRIES) {
-                // Exponential backoff: base wait * 2^(attempt-1), or use suggested retry time
-                // if provided.
-                let delayMs = RATE_LIMIT_RETRY_WAIT_MS * 2 ** (attempt - 1);
+                // Exponential backoff: baseDelayMs * 2^(attempt-1), or use suggested retry time if provided.
+                const base =
+                  this.config.baseDelayMs ?? RATE_LIMIT_RETRY_WAIT_MS;
+                let delayMs = base * 2 ** (attempt - 1);
+                const maxDelay = this.config.maxDelayMs ?? delayMs;
 
                 // Parse suggested retry time from error message, e.g., "Please try again in 1.3s"
                 const msg = errCtx?.message ?? "";
                 const m = /(?:retry|try) again in ([\d.]+)s/i.exec(msg);
                 if (m && m[1]) {
                   const suggested = parseFloat(m[1]) * 1000;
                   if (!Number.isNaN(suggested)) {
-                    delayMs = suggested;
+                    delayMs = Math.min(suggested, maxDelay);
                   }
                 }
+                // Cap delay to maxDelay
+                if (delayMs > maxDelay) {
+                  delayMs = maxDelay;
+                }
                 log(
                   `OpenAI rate limit exceeded (attempt ${attempt}/${MAX_RETRIES}), retrying in ${Math.round(
                     delayMs,
@@ -907,7 +913,8 @@ export class AgentLoop {
           return;
         }
 
-        const MAX_STREAM_RETRIES = 5;
+        // Max number of retries for streaming on rate-limit or transient errors
+        const MAX_STREAM_RETRIES = this.config.maxRetries ?? 5;
         let streamRetryAttempt = 0;
 
         // eslint-disable-next-line no-constant-condition
@@ -1037,8 +1044,14 @@ export class AgentLoop {
             ) {
               streamRetryAttempt += 1;
 
-              const waitMs =
-                RATE_LIMIT_RETRY_WAIT_MS * 2 ** (streamRetryAttempt - 1);
+              // Exponential backoff for stream retry
+              const baseStream =
+                this.config.baseDelayMs ?? RATE_LIMIT_RETRY_WAIT_MS;
+              let waitMs = baseStream * 2 ** (streamRetryAttempt - 1);
+              const maxDelay = this.config.maxDelayMs ?? waitMs;
+              if (waitMs > maxDelay) {
+                waitMs = maxDelay;
+              }
               log(
                 `OpenAI stream rate‑limited – retry ${streamRetryAttempt}/${MAX_STREAM_RETRIES} in ${waitMs} ms`,
               );

diff --git a/codex-cli/src/utils/config.ts b/codex-cli/src/utils/config.ts
@@ -171,6 +171,14 @@ export type AppConfig = {
 
   /** Enable the "flex-mode" processing mode for supported models (o3, o4-mini) */
   flexMode?: boolean;
+  /** Max number of retry attempts for rate-limit and transient errors */
+  maxRetries?: number;
+  /** Base delay in milliseconds for exponential back-off */
+  baseDelayMs?: number;
+  /** Maximum back-off delay in milliseconds */
+  maxDelayMs?: number;
+  /** Print token usage report after each request */
+  tokenReport?: boolean;
   providers?: Record<string, { name: string; baseURL: string; envKey: string }>;
   history?: {
     maxSize: number;