diff --git a/.gitignore b/.gitignore
index 9a941be..0e235ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,4 @@ temp/
 
 # Jest cache
 .jest/
+.codex
diff --git a/CLAUDE.md b/CLAUDE.md
index 11ae7fa..36aa673 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -73,7 +73,7 @@ Sessions enable multi-turn conversations with Codex:
 2. **Subsequent requests with same sessionId**: Uses `codex exec resume <conversation-id>` (native Codex resume)
 3. **Fallback**: If no conversation ID exists, manually builds enhanced prompt from conversation history
 
-**Important**: When resuming sessions, `sandbox`, `fullAuto`, and `workingDirectory` parameters are NOT applied (Codex CLI limitation).
+**Important**: When resuming sessions, `sandbox` and `workingDirectory` are not applied. This server does forward `fullAuto` and `bypassApprovals` on resume, but those flags still depend on the installed Codex CLI supporting them.
 
 ### Streaming Progress
 
@@ -113,16 +113,18 @@ codex exec --model X --sandbox Y [-c config=value] --skip-git-repo-check "prompt
 
 **Resume mode** (existing conversations):
 ```
-codex exec --skip-git-repo-check -c model="X" -c model_reasoning_effort="Y" resume <conversation-id> "prompt"
+codex exec --skip-git-repo-check -c model="X" -c model_reasoning_effort="Y" [--full-auto] [--dangerously-bypass-approvals-and-sandbox] resume <conversation-id> "prompt"
 ```
 
 Note: Config flags (`-c`) must come BEFORE the subcommand (`exec` or `resume`).
 
 ### Environment Variables
 
-- `CODEX_DEFAULT_MODEL`: Default model for codex/review tools (default: `gpt-5.3-codex`)
+- `CODEX_DEFAULT_MODEL`: Default model for codex/review tools (default: `gpt-5.4`)
 - `CODEX_MCP_CALLBACK_URI`: Static MCP callback URI passed to Codex (override via tool arg)
 - `STRUCTURED_CONTENT_ENABLED`: Enable `structuredContent` in responses (default: false)
+- `CODEX_MCP_DEBUG_STARTUP`: Emit startup banner for debugging (default: false)
+- `CODEX_MCP_DEBUG_COMMANDS`: Emit command argv/stderr for debugging (default: false)
 
 ## TypeScript Configuration
 
diff --git a/README.md b/README.md
index d297ef1..e78f70e 100644
--- a/README.md
+++ b/README.md
@@ -83,6 +83,8 @@ Use review with uncommitted true to review my local changes
 ```
 Use codex with model "o3" and reasoningEffort "high" for complex analysis
 Use codex with fullAuto true and sandbox "workspace-write" for automated tasks
+Use codex with bypassApprovals true only inside an externally sandboxed environment
+Use codex with timeoutMs 300000 for long-running tasks
 Use codex with callbackUri "http://localhost:1234/callback" for static callbacks
 Use codex to return structuredContent with threadId metadata when available
 ```
@@ -107,9 +109,17 @@ Use websearch with query "React Server Components" and searchDepth "full"
 - **[API Reference](docs/api-reference.md)** — Full tool parameters and response formats
 - **[Session Management](docs/session-management.md)** — How conversations work
 - **[Codex CLI Integration](docs/codex-cli-integration.md)** — Version compatibility and CLI details
+- **[Claude Code Lifecycle Investigation](docs/claude-code-lifecycle-investigation.md)** — Reproductions, fixes, and what Claude tears down itself
 
 ## Environment Variables
+- `CODEX_DEFAULT_MODEL`: Override the default model for `codex` and `review` (default `gpt-5.4`)
 - `CODEX_MCP_CALLBACK_URI`: Static MCP callback URI passed to Codex when set (overridden by `callbackUri` tool arg)
+- `CODEX_TOOL_TIMEOUT_MS`: Timeout in milliseconds for serialized tool calls (default `120000`)
+- `STRUCTURED_CONTENT_ENABLED`: Emit `structuredContent` alongside text metadata when truthy
+- `CODEX_MCP_DEBUG_STARTUP`: Set to `1`, `true`, `yes`, or `on` to emit the startup banner on stderr for debugging
+- `CODEX_MCP_DEBUG_COMMANDS`: Set to `1`, `true`, `yes`, or `on` to emit command argv and stderr for debugging
+
+`timeoutMs` on the `codex` tool overrides `CODEX_TOOL_TIMEOUT_MS` for a single request.
 
 ## Development
 
diff --git a/docs/TODO.md b/docs/TODO.md
index 759943f..9472624 100644
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -1,5 +1,9 @@
 # Codex MCP Server - TODO
 
+This file is backlog-oriented. Do not treat the "Implemented" section below as
+the current runtime source of truth; check `README.md`, `docs/api-reference.md`,
+and the code first.
+
 ## Features from Codex CLI v0.98.0
 
 These features were introduced/stabilized in Codex CLI v0.98.0 but are not yet implemented in this MCP server.
@@ -50,11 +54,11 @@ These features were introduced/stabilized in Codex CLI v0.98.0 but are not yet i
 
 ## Implemented in v1.3.4+
 
-### ✅ GPT-5.3-Codex Model
+### ✅ GPT-5.4 Default Model
 - **Status**: Implemented
 - **Description**: New default model
 - **Changes**:
-  - Updated `DEFAULT_CODEX_MODEL` constant to `'gpt-5.3-codex'`
+  - Updated `DEFAULT_CODEX_MODEL` constant to `'gpt-5.4'`
   - Updated tool definitions to reflect new default
   - Single source of truth for model updates
 
diff --git a/docs/api-reference.md b/docs/api-reference.md
index 666a298..0131efc 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -1,442 +1,157 @@
 # API Reference
 
-## Overview
-Complete reference for the Codex MCP Server tools and interfaces.
+Current reference for the public MCP surface exposed by `codex-mcp-server`.
 
-This server implements the **MCP 2025-11-25 specification**, including tool annotations and progress notifications.
+## Requirements
 
-## Installation Options
+- Codex CLI `v0.75.0+`
+- An authenticated Codex CLI (`codex login --api-key "..."`)
+- For resumed-session `fullAuto` or `bypassApprovals`, use a Codex CLI build whose `codex exec resume --help` lists those flags
 
-### Claude Code
-```bash
-claude mcp add codex-cli -- npx -y codex-mcp-server
-```
-
-### Claude Desktop
-Add to your configuration file:
-
-**macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`
-
-**Windows:** `%APPDATA%/Claude/claude_desktop_config.json`
-
-```json
-{
-  "mcpServers": {
-    "codex-cli": {
-      "command": "npx",
-      "args": ["-y", "codex-mcp-server"]
-    }
-  }
-}
-```
-
-## MCP Protocol Features
-
-### Tool Annotations
-All tools include annotations that provide hints to MCP clients about tool behavior:
-
-| Annotation | Type | Description |
-|------------|------|-------------|
-| `title` | string | Human-readable tool name |
-| `readOnlyHint` | boolean | Tool doesn't modify state (safe to call) |
-| `destructiveHint` | boolean | Tool may modify files or external state |
-| `idempotentHint` | boolean | Multiple calls produce same result |
-| `openWorldHint` | boolean | Tool interacts with external services (network, APIs) |
-
-#### Tool Annotation Matrix
-| Tool | `title` | `readOnlyHint` | `destructiveHint` | `idempotentHint` | `openWorldHint` |
-|------|---------|---------------|-------------------|------------------|-----------------|
-| `codex` | Execute Codex CLI | `false` | `true` | `false` | `true` |
-| `review` | Code Review | `true` | `false` | `true` | `true` |
-| `ping` | Ping Server | `true` | `false` | `true` | `false` |
-| `help` | Get Help | `true` | `false` | `true` | `false` |
-| `listSessions` | List Sessions | `true` | `false` | `true` | `false` |
-
-### Progress Notifications
-For long-running operations, the server sends `notifications/progress` messages when the client includes a `progressToken` in the request `_meta`.
-
-**Request with Progress Token:**
-```json
-{
-  "jsonrpc": "2.0",
-  "id": 1,
-  "method": "tools/call",
-  "params": {
-    "name": "codex",
-    "arguments": { "prompt": "Analyze this codebase" },
-    "_meta": { "progressToken": "unique-token-123" }
-  }
-}
-```
-
-**Progress Notification (sent during execution):**
-```json
-{
-  "jsonrpc": "2.0",
-  "method": "notifications/progress",
-  "params": {
-    "progressToken": "unique-token-123",
-    "progress": 1,
-    "message": "Processing output from Codex..."
-  }
-}
-```
-
-**Supported Tools:** `codex`, `review` (long-running operations)
-
-> **Note:** Progress notifications are streamed in real-time from CLI stdout/stderr. Client support for displaying these notifications varies.
-
-## Tools
-
-### `codex` - AI Coding Assistant
-
-Execute Codex CLI with advanced session management and model control.
-
-**Annotations:** `readOnlyHint: false`, `destructiveHint: true`, `idempotentHint: false`, `openWorldHint: true`
-
-#### Parameters
-
-| Parameter | Type | Required | Default | Description |
-|-----------|------|----------|---------|-------------|
-| `prompt` | string | ✅ | - | The coding task, question, or analysis request |
-| `sessionId` | string | ❌ | - | Session ID for conversational context |
-| `resetSession` | boolean | ❌ | `false` | Reset session history before processing |
-| `model` | string | ❌ | `gpt-5.2-codex` | Model to use for processing |
-| `reasoningEffort` | enum | ❌ | - | Control reasoning depth |
-| `sandbox` | enum | ❌ | - | Sandbox policy: `read-only`, `workspace-write`, `danger-full-access` |
-| `fullAuto` | boolean | ❌ | `false` | Enable full-auto mode (sandboxed automatic execution) |
-| `workingDirectory` | string | ❌ | - | Working directory for the agent |
-| `callbackUri` | string | ❌ | - | Static MCP callback URI passed via env to Codex |
-
-#### Model Options
-- `gpt-5.2-codex` (default) - Latest specialized coding model optimized for agentic tasks
-- `gpt-5.1-codex` - Previous coding model version
-- `gpt-5.1-codex-max` - Enhanced coding model for complex tasks
-- `gpt-5-codex` - Base GPT-5 coding model
-- `gpt-4o` - Fast multimodal model
-- `gpt-4` - Advanced reasoning capabilities
-
-#### Reasoning Effort Levels
-- `low` - Quick responses, minimal processing
-- `medium` - Balanced quality and speed
-- `high` - Thorough analysis and comprehensive responses
-
-#### Response Format
-```typescript
-interface CodexToolResponse {
-  content: Array<{
-    type: 'text';
-    text: string;
-    _meta?: {
-      threadId?: string;
-      model?: string;
-      sessionId?: string;
-      callbackUri?: string;
-    };
-  }>;
-  structuredContent?: {
-    threadId?: string;
-    model?: string;
-    sessionId?: string;
-    callbackUri?: string;
-  };
-}
-```
-
-**Note:** `structuredContent` is only emitted when `STRUCTURED_CONTENT_ENABLED` is set to a truthy value (`1`, `true`, `yes`, `on`). It is **disabled by default**. `_meta` remains available in `content` for Claude Code compatibility.
-
-#### Output Schema (structuredContent)
-The `codex` tool advertises an `outputSchema` that describes the structure of `structuredContent` returned in tool results when enabled.
-```json
-{
-  "type": "object",
-  "properties": {
-    "threadId": { "type": "string" }
-  }
-}
-```
-
-#### Examples
-
-**Basic Usage:**
-```json
-{
-  "prompt": "Explain this Python function and suggest improvements"
-}
-```
-
-**With Model Selection:**
-```json
-{
-  "prompt": "Perform complex architectural analysis",
-  "model": "gpt-4",
-  "reasoningEffort": "high"
-}
-```
-
-**Session Management:**
-```json
-{
-  "prompt": "Continue our previous discussion",
-  "sessionId": "my-coding-session"
-}
-```
-
-**Reset Session:**
-```json
-{
-  "prompt": "Start fresh analysis",
-  "sessionId": "my-coding-session",
-  "resetSession": true
-}
-```
+## Tool Matrix
 
----
+| Tool | Purpose | Progress-capable | Spawns Codex CLI |
+| --- | --- | --- | --- |
+| `codex` | General Codex execution with sessions and execution controls | Yes | Yes |
+| `review` | Repository review against working tree, base branch, or commit | Yes | Yes |
+| `websearch` | Search-backed Codex run via `codex --search exec` | Yes | Yes |
+| `help` | Return `codex --help` output | Yes | Yes |
+| `listSessions` | List active in-memory sessions | No | No |
+| `ping` | Echo/health check | No | No |
 
-### `review` - Code Review
+## `codex`
 
-Run AI-powered code reviews against your repository using Codex CLI.
+Execute Codex CLI in non-interactive mode.
 
-**Annotations:** `readOnlyHint: true`, `destructiveHint: false`, `idempotentHint: true`, `openWorldHint: true`
+| Parameter | Type | Required | Default | Notes |
+| --- | --- | --- | --- | --- |
+| `prompt` | string | Yes | - | Main Codex task |
+| `sessionId` | string | No | - | Caller-supplied session key, 1-256 chars, `[A-Za-z0-9_-]+` |
+| `resetSession` | boolean | No | `false` | Clears stored server-side history before this request |
+| `model` | string | No | `gpt-5.4` | Free-form Codex model string |
+| `reasoningEffort` | enum | No | - | `none`, `minimal`, `low`, `medium`, `high`, `xhigh` |
+| `sandbox` | enum | No | - | `read-only`, `workspace-write`, `danger-full-access` |
+| `fullAuto` | boolean | No | `false` | Sandboxed automatic execution without approval prompts |
+| `bypassApprovals` | boolean | No | `false` | Disables prompts and sandboxing entirely; use only in externally sandboxed environments |
+| `workingDirectory` | string | No | - | Passed via `-C` for new executions |
+| `callbackUri` | string | No | - | Overrides `CODEX_MCP_CALLBACK_URI` for this request |
+| `timeoutMs` | integer | No | `CODEX_TOOL_TIMEOUT_MS` or `120000` | Per-request override for this tool only |
 
-#### Parameters
+Important behavior:
 
-| Parameter | Type | Required | Default | Description |
-|-----------|------|----------|---------|-------------|
-| `prompt` | string | ❌ | - | Custom review instructions or focus areas |
-| `uncommitted` | boolean | ❌ | `false` | Review staged, unstaged, and untracked changes |
-| `base` | string | ❌ | - | Review changes against a specific base branch |
-| `commit` | string | ❌ | - | Review changes introduced by a specific commit SHA |
-| `title` | string | ❌ | - | Title to display in the review summary |
-| `model` | string | ❌ | `gpt-5.2-codex` | Model to use for the review (passed via `-c model="..."`) |
-| `workingDirectory` | string | ❌ | - | Working directory to run the review in (passed via `-C`) |
+- `bypassApprovals` is mutually exclusive with `sandbox`.
+- `bypassApprovals` is mutually exclusive with `fullAuto`.
+- On resumed sessions, `sandbox` and `workingDirectory` are not applied.
+- On resumed sessions, this server still forwards `fullAuto` and `bypassApprovals` when requested. If your local Codex CLI does not support those resume flags, the CLI itself will reject the command.
 
-#### Examples
+Response behavior:
 
-**Review Uncommitted Changes:**
-```json
-{
-  "uncommitted": true
-}
-```
+- The primary text result is returned in `content[0].text`.
+- `content[0]._meta` includes `model`, plus `sessionId`, `callbackUri`, and `threadId` when available.
+- `structuredContent` is only included when `STRUCTURED_CONTENT_ENABLED` is truthy.
 
-**Review Against Main Branch:**
-```json
-{
-  "base": "main",
-  "prompt": "Focus on security vulnerabilities"
-}
-```
+Example:
 
-**Review Specific Commit:**
 ```json
 {
-  "commit": "abc123def",
-  "title": "Security Audit"
+  "prompt": "Review this refactor for race conditions",
+  "sessionId": "race-audit",
+  "model": "gpt-5.4",
+  "reasoningEffort": "high",
+  "timeoutMs": 300000
 }
 ```
 
-#### Response Format
-```typescript
-interface ReviewToolResponse {
-  content: Array<{
-    type: 'text';
-    text: string; // Review output from Codex
-    _meta?: {
-      model: string;
-      base?: string;
-      commit?: string;
-    };
-  }>;
-  structuredContent?: {
-    model: string;
-    base?: string;
-    commit?: string;
-  };
-}
-```
+## `review`
 
-**Note:** `structuredContent` is only emitted when `STRUCTURED_CONTENT_ENABLED` is set to a truthy value (`1`, `true`, `yes`, `on`). It is **disabled by default**. `_meta` remains available in `content` for Claude Code compatibility.
+Run `codex review`.
 
----
+| Parameter | Type | Required | Default | Notes |
+| --- | --- | --- | --- | --- |
+| `prompt` | string | No | - | Extra review instructions |
+| `uncommitted` | boolean | No | `false` | Review staged, unstaged, and untracked changes |
+| `base` | string | No | - | Review against a branch or ref |
+| `commit` | string | No | - | Review a single commit |
+| `title` | string | No | - | Optional review title |
+| `model` | string | No | `gpt-5.4` | Passed via `-c model="..."` |
+| `workingDirectory` | string | No | - | Passed via global `-C` and spawn `cwd` |
 
-### `listSessions` - Session Management
+Notes:
 
-List all active conversation sessions with metadata.
+- `prompt` cannot be combined with `uncommitted: true`.
+- Global `CODEX_TOOL_TIMEOUT_MS` still applies.
+- Timed-out reviews now honor abort and unblock the serialized queue promptly.
 
-**Annotations:** `readOnlyHint: true`, `destructiveHint: false`, `idempotentHint: true`, `openWorldHint: false`
+## `websearch`
 
-#### Parameters
-No parameters required.
+Run `codex --search exec`.
 
-#### Response Format
-```typescript
-interface SessionInfo {
-  id: string;
-  createdAt: string; // ISO 8601 timestamp
-  lastAccessedAt: string; // ISO 8601 timestamp
-  turnCount: number;
-}
-```
+| Parameter | Type | Required | Default | Notes |
+| --- | --- | --- | --- | --- |
+| `query` | string | Yes | - | Search request |
+| `numResults` | integer | No | `10` | `1` through `50` |
+| `searchDepth` | enum | No | `basic` | `basic` or `full` |
 
-#### Example Response
-```json
-{
-  "content": [{
-    "type": "text",
-    "text": "[\n  {\n    \"id\": \"abc-123-def\",\n    \"createdAt\": \"2025-01-01T12:00:00.000Z\",\n    \"lastAccessedAt\": \"2025-01-01T12:30:00.000Z\",\n    \"turnCount\": 5\n  }\n]"
-  }]
-}
-```
+Notes:
 
----
+- Global `CODEX_TOOL_TIMEOUT_MS` applies.
+- Timed-out web searches honor abort and unblock the serialized queue promptly.
 
-### `ping` - Connection Test
+## `help`
 
-Test MCP server connection and responsiveness.
+Returns `codex --help`.
 
-**Annotations:** `readOnlyHint: true`, `destructiveHint: false`, `idempotentHint: true`, `openWorldHint: false`
+- No arguments
+- Global `CODEX_TOOL_TIMEOUT_MS` applies
+- Timed-out help calls honor abort and unblock the serialized queue promptly
 
-#### Parameters
+## `listSessions`
 
-| Parameter | Type | Required | Default | Description |
-|-----------|------|----------|---------|-------------|
-| `message` | string | ❌ | `pong` | Message to echo back |
+Returns active in-memory session metadata for the current server process.
 
-#### Example
-```json
-{
-  "message": "Hello, server!"
-}
-```
+Example payload shape:
 
-#### Response
 ```json
-{
-  "content": [{
-    "type": "text",
-    "text": "Hello, server!"
-  }]
-}
-```
-
----
-
-### `help` - Codex CLI Help
-
-Get information about Codex CLI capabilities and commands.
-
-**Annotations:** `readOnlyHint: true`, `destructiveHint: false`, `idempotentHint: true`, `openWorldHint: false`
-
-#### Parameters
-No parameters required.
-
-#### Response
-Returns the output of `codex --help` command, providing comprehensive CLI documentation.
-
-## Session Management
-
-### Session Lifecycle
-
-1. **Creation**: Sessions are created automatically or explicitly via `sessionId`
-2. **Activity**: Each interaction updates `lastAccessedAt` timestamp
-3. **Persistence**: Sessions persist for 24 hours of inactivity
-4. **Cleanup**: Automatic removal of expired sessions
-5. **Limits**: Maximum 100 concurrent sessions
-
-### Session Data Structure
-
-```typescript
-interface SessionData {
-  id: string;                    // UUID-based session identifier
-  createdAt: Date;              // Session creation timestamp
-  lastAccessedAt: Date;         // Last interaction timestamp
-  turns: ConversationTurn[];    // Conversation history
-  codexConversationId?: string; // Native Codex conversation ID
-}
-
-interface ConversationTurn {
-  prompt: string;    // User's original prompt
-  response: string;  // Codex response
-  timestamp: Date;   // Turn timestamp
-}
-```
-
-### Resume Functionality
-
-The server leverages Codex CLI v0.50.0+ native resume functionality:
-
-1. **Conversation ID Extraction**: Automatically captures conversation IDs from Codex output (supports both "session id" and "conversation id" formats)
-2. **Native Resume**: Uses `codex exec resume <conversation-id>` for optimal continuity
-3. **Fallback Context**: Manual context building when native resume unavailable
-4. **Seamless Integration**: Transparent to end users
-
-## Error Handling
-
-### Error Response Format
-```typescript
-interface ErrorResponse {
-  content: Array<{
-    type: 'text';
-    text: string; // Error description
-  }>;
-  isError: true;
-}
+[
+  {
+    "id": "refactor-thread",
+    "createdAt": "2026-04-26T08:00:00.000Z",
+    "lastAccessedAt": "2026-04-26T08:15:00.000Z",
+    "turnCount": 3
+  }
+]
 ```
 
-### Common Error Scenarios
-
-#### Authentication Errors
-- **Cause**: Codex CLI not authenticated
-- **Message**: "Authentication failed: Please run `codex login`"
-- **Resolution**: Run `codex login --api-key "your-key"`
-
-#### Model Errors
-- **Cause**: Invalid or unavailable model specified
-- **Message**: "Invalid model: <model-name>"
-- **Resolution**: Use supported model or omit for default
-
-#### Session Errors
-- **Cause**: Corrupted session data or invalid session ID
-- **Behavior**: Graceful degradation, continues with fresh context
-- **Impact**: Minimal - system auto-recovers
-
-#### CLI Errors
-- **Cause**: Codex CLI not installed or network issues
-- **Message**: "Failed to execute codex command"
-- **Resolution**: Install CLI and check network connectivity
-
-## Performance Considerations
+## `ping`
 
-### Memory Management
-- **Session TTL**: 24-hour automatic cleanup
-- **Session Limits**: Maximum 100 concurrent sessions
-- **Context Optimization**: Recent turns only (last 2) for fallback context
+Echo/health check.
 
-### Response Optimization
-- **Model Selection**: Default `gpt-5.2-codex` optimized for agentic coding
-- **Reasoning Control**: Adjust effort based on task complexity
-- **Native Resume**: Preferred over manual context building
+| Parameter | Type | Required | Default |
+| --- | --- | --- | --- |
+| `message` | string | No | `pong` |
 
-### Scalability
-- **Stateless Design**: Core functionality works without sessions
-- **Graceful Degradation**: Continues operation despite component failures
-- **Resource Cleanup**: Automatic management of memory and storage
+## Sessions
 
-## Configuration
+- Sessions are server-process local and stored only in memory.
+- A session is created on first `codex` call that supplies `sessionId`.
+- The server stores recent turns plus the Codex conversation ID when it can extract one from CLI output.
+- Sessions expire after 24 hours of inactivity.
+- The store keeps at most 100 sessions.
+- If the server restarts, sessions are lost.
 
-### Environment Variables
-None required - authentication handled by Codex CLI.
+## Environment Variables
 
-Optional:
-- `CODEX_MCP_CALLBACK_URI`: Static MCP callback URI passed to Codex CLI when invoking tools.
+| Variable | Purpose | Default |
+| --- | --- | --- |
+| `CODEX_DEFAULT_MODEL` | Override the default model for `codex` and `review` | `gpt-5.4` |
+| `CODEX_MCP_CALLBACK_URI` | Default callback URI for `codex` requests | unset |
+| `CODEX_TOOL_TIMEOUT_MS` | Timeout for serialized tool calls | `120000` |
+| `STRUCTURED_CONTENT_ENABLED` | Emit `structuredContent` alongside `_meta` | unset / false |
+| `CODEX_MCP_DEBUG_STARTUP` | Emit a startup banner to stderr | unset / false |
+| `CODEX_MCP_DEBUG_COMMANDS` | Emit command argv and stderr to stderr | unset / false |
 
-### Codex CLI Requirements
-- **Version**: 0.36.0 or later
-- **Authentication**: `codex login --api-key "your-key"`
-- **Verification**: `codex --help` should execute successfully
+## Current Constraints
 
-### Optional Configuration
-- **CODEX_HOME**: Custom directory for Codex CLI configuration
-- **Session Limits**: Configurable in server implementation (default: 100)
-- **TTL Settings**: Configurable session expiration (default: 24 hours)
\ No newline at end of file
+- Calls are serialized through one queue to avoid stdio races.
+- `timeoutMs` is only a `codex` tool parameter; other tools use the global timeout.
+- `sandbox` and `workingDirectory` are not applied on resumed Codex sessions.
diff --git a/docs/architecture-and-fixes.md b/docs/architecture-and-fixes.md
new file mode 100644
index 0000000..5653d6f
--- /dev/null
+++ b/docs/architecture-and-fixes.md
@@ -0,0 +1,138 @@
+# codex-mcp-server: Current Architecture and Hardening Notes
+
+Current-state reference for the server architecture and the lifecycle fixes
+landed so far.
+
+## Source Layout
+
+```text
+src/
+├── index.ts
+├── server.ts
+├── runtime-config.ts
+├── types.ts
+├── errors.ts
+├── tools/
+│   ├── definitions.ts
+│   └── handlers.ts
+├── utils/
+│   └── command.ts
+└── session/
+    └── storage.ts
+```
+
+## Core Execution Model
+
+1. The MCP stdio transport receives `tools/call`.
+2. `server.ts` serializes every call through `callQueue`.
+3. A per-request `AbortController` is created.
+4. `withTimeout()` races the tool handler against the configured timeout.
+5. On timeout, the controller aborts and the queue slot remains blocked until
+   the underlying operation actually settles.
+
+This serialization is intentional. It avoids the original stdio race where two
+concurrent child processes could corrupt the JSON-RPC stream.
+
+## Command Execution Model
+
+`utils/command.ts` is the shared spawn path for command-backed tools.
+
+Current behavior:
+
+- closes child stdin immediately with `child.stdin?.end()`
+- creates a detached process group when abort support is in use
+- sends `SIGTERM`, then `SIGKILL` after a short grace period
+- buffers stdout and stderr up to 10MB
+- treats `stdout` or `stderr` output as a usable command result even if the exit
+  code is non-zero, because Codex often writes primary output to stderr
+- keeps command argv and stderr logging disabled by default
+
+## Current Tool Coverage
+
+Command-backed tools:
+
+- `codex`
+- `review`
+- `websearch`
+- `help`
+
+Non-command-backed tools:
+
+- `ping`
+- `listSessions`
+
+All command-backed tools now honor the shared abort path.
+
+## Hardening Fixes Already Landed
+
+### Concurrent-call serialization
+
+`callQueue` ensures only one `tools/call` handler runs at a time.
+
+### Queue does not release before child exit
+
+The queue slot stays blocked until the underlying operation settles, including
+timed-out calls.
+
+### Non-interactive stdin handling
+
+Spawned Codex children get EOF on stdin immediately so they do not hang waiting
+for additional input.
+
+### Timeout recovery
+
+- global timeout via `CODEX_TOOL_TIMEOUT_MS`
+- per-call timeout override via `timeoutMs` on the `codex` tool
+- shared abort path now applies to `codex`, `review`, `websearch`, and `help`
+
+### Startup and command logging cleanup
+
+- startup banner is gated by `CODEX_MCP_DEBUG_STARTUP`
+- command argv/stderr logging is gated by `CODEX_MCP_DEBUG_COMMANDS`
+
+### Default model drift fix
+
+Default model is `gpt-5.4`, with `CODEX_DEFAULT_MODEL` as the override.
+
+### `bypassApprovals` contract completion
+
+- exposed in the MCP tool definition
+- forwarded on new and resumed Codex executions
+- rejected when combined with `sandbox`
+- rejected when combined with `fullAuto`
+
+## Session Model
+
+- sessions are in-memory only
+- caller provides `sessionId`
+- server stores recent turns plus the Codex conversation ID when available
+- native Codex resume is preferred
+- fallback context reconstruction uses recent turns when native resume is not
+  available
+
+## Current Environment Variables
+
+| Variable | Purpose | Default |
+| --- | --- | --- |
+| `CODEX_DEFAULT_MODEL` | Override the default model for `codex` and `review` | `gpt-5.4` |
+| `CODEX_MCP_CALLBACK_URI` | Default callback URI for `codex` requests | unset |
+| `CODEX_TOOL_TIMEOUT_MS` | Timeout for serialized tool calls | `120000` |
+| `STRUCTURED_CONTENT_ENABLED` | Emit `structuredContent` | unset / false |
+| `CODEX_MCP_DEBUG_STARTUP` | Emit startup banner | unset / false |
+| `CODEX_MCP_DEBUG_COMMANDS` | Emit command argv/stderr | unset / false |
+
+## Intentional Constraints
+
+- The queue still serializes all tool calls, not only Codex-heavy ones.
+- `timeoutMs` is only a `codex` tool argument.
+- `sandbox` and `workingDirectory` are not applied on resumed sessions.
+- Session state is not persisted across server restarts.
+
+## Verification Gate
+
+Use the current repo state, not historical green claims:
+
+```bash
+npm run build
+npm test -- --runInBand
+```
diff --git a/docs/claude-code-lifecycle-investigation.md b/docs/claude-code-lifecycle-investigation.md
new file mode 100644
index 0000000..353d067
--- /dev/null
+++ b/docs/claude-code-lifecycle-investigation.md
@@ -0,0 +1,224 @@
+# Claude Code Lifecycle Investigation
+
+Date: 2026-04-25
+
+This document captures the end-to-end investigation of `codex-mcp-server`
+behavior under Claude Code after the following fixes:
+
+- `ac66ca1` `fix: serialize concurrent tools/call requests to prevent stdio race`
+- `358c106` `feat: add per-call timeout to prevent queue starvation on hung API calls`
+- `a1cd967` `fix: block queue until timed out codex child exits`
+- follow-up runtime cleanup on 2026-04-25:
+  - startup banner moved behind `CODEX_MCP_DEBUG_STARTUP`
+  - server version now resolves from `package.json`
+- follow-up timeout override on 2026-04-25:
+  - `timeoutMs` added to the `codex` tool schema and MCP definition
+  - per-call timeout now overrides `CODEX_TOOL_TIMEOUT_MS`
+- follow-up lifecycle hardening on 2026-04-26:
+  - `review`, `websearch`, and `help` now honor the shared abort path
+  - command argv/stderr logging is now gated behind `CODEX_MCP_DEBUG_COMMANDS`
+  - `bypassApprovals` is exposed and validated end-to-end
+
+## Executive Summary
+
+The current build does **not** reproduce the original "first call works, second
+call hangs" failure under Claude Code.
+
+What is fixed:
+
+- Concurrent `tools/call` requests are serialized.
+- A timed-out Codex child is aborted and the queue remains blocked until the
+  child actually exits.
+- Claude can make a second `mcp__codex__codex` call after a timeout in the same
+  session.
+
+What is not a server bug:
+
+- Claude Code tears down stdio MCP servers at the end of `claude -p` sessions.
+- Resuming a Claude conversation is path-scoped by Claude; the same session ID
+  may fail outside the original local path.
+
+## Scenario Matrix
+
+| Scenario | Command shape | Result | Notes |
+| --- | --- | --- | --- |
+| Direct stdio lifecycle tests | `npm test -- --runInBand src/__tests__/mcp-lifecycle.test.ts --runTestsByPath` | Pass | Covers sequential calls, concurrent serialization, codex timeout recovery, and review timeout recovery |
+| Claude print, single live Codex call | `claude -p --allowedTools mcp__codex__codex --tools "" ...` | Pass in 20s | Returned `LIVE_OK` |
+| Claude print, two sequential live Codex calls | Same as above, but force two tool calls | Pass in 34s | Returned `FIRST_OK` then `SECOND_OK` |
+| Claude print, resumed session, single live Codex call | `claude -p --resume <session-id> ...` from original local path | Pass in 30s | Returned `RESUMED_LIVE_OK` |
+| Claude interactive resume | `claude --resume <session-id>` | Pass | Session opened normally and could invoke Codex |
+| Claude print, strict stub, two fast calls | Temp MCP config with stub `codex` binary | Pass in 11s | Returned `stub stdout` twice |
+| Claude print, strict stub, timeout then recovery | Temp MCP config with `CODEX_TOOL_TIMEOUT_MS=100` | Pass in 11s | First call timed out, second call still returned `stub stdout` |
+| Claude print, strict stub, per-call timeout override | Temp MCP config with `CODEX_TOOL_TIMEOUT_MS=9000` and request `timeoutMs=100` | Pass in 15s | First call timed out at 100ms, second call still returned `stub stdout` |
+
+## Key Evidence
+
+### 1. Claude does not lose the second live call anymore
+
+In the two-call live probe, Claude debug showed two distinct successful tool
+dispatches:
+
+- first `mcp__codex__codex` call started and ended successfully
+- second `mcp__codex__codex` call started and ended successfully
+
+After that, Claude sent `SIGINT` and closed the stdio connection cleanly. The
+server did not crash between the two calls.
+
+### 2. Timeout recovery now works under Claude itself
+
+Using a strict temporary MCP config and a stub `codex` executable that:
+
+- sleeps for 5 seconds for `slow-timeout`
+- fails with `Not connected` on overlap
+- returns quickly otherwise
+
+Claude produced:
+
+```text
+Error in tool "codex": Tool call timed out after 100ms
+stub stdout
+```
+
+That is the critical regression check. Before `a1cd967`, the timeout response
+could be returned before the killed child had actually released its resources,
+so the next queued call could still fail or wedge.
+
+### 3. Claude still tears the server down after the session
+
+Successful Claude debug logs consistently show this pattern after the final
+response:
+
+- Claude sends `SIGINT` to the MCP server process
+- the `codex` stdio connection closes cleanly
+- Claude records `MCP server process exited cleanly`
+
+That supports the earlier lifecycle finding: Claude owns MCP server lifetime for
+these print sessions.
+
+## Root Cause Breakdown
+
+### Bug 1: Concurrent request race
+
+Cause:
+
+- the `CallToolRequestSchema` handler allowed multiple `handler.execute()`
+  calls to run at once
+- concurrent child processes raced on the same Codex stdio path
+
+Fix:
+
+- serialize `tools/call` execution through `callQueue`
+
+Status:
+
+- fixed by `ac66ca1`
+
+### Bug 2: Timeout released the queue too early
+
+Cause:
+
+- the server returned a timeout error to Claude
+- but the underlying `handler.execute()` was still unwinding
+- the next queued request could start before the timed-out child had fully
+  exited
+
+Fix:
+
+- propagate an `AbortSignal`
+- terminate the Codex child on timeout
+- keep the queue blocked until the aborted operation actually settles
+
+Status:
+
+- fully fixed by `a1cd967`
+
+### Non-bug: Claude session teardown
+
+Observed behavior:
+
+- at the end of successful `claude -p` runs, Claude sends `SIGINT`
+- the MCP stdio connection closes cleanly
+
+Interpretation:
+
+- this is expected client lifecycle behavior
+- it is not evidence that `codex-mcp-server` crashed after one call
+
+## What This Rules Out
+
+- The current server does not reproduce a "one successful call, then dead
+  server" failure under Claude Code.
+- The current server does not jam the queue after a timed-out Codex child in the
+  tested recovery path.
+- The resumed Claude session itself is not inherently broken.
+
+## Remaining Rough Edges
+
+These are real follow-up candidates, but they are not the root cause of the
+hang investigated here.
+
+### Timeout tuning
+
+`CODEX_TOOL_TIMEOUT_MS` exists and works, and callers can now override it per
+request with `timeoutMs`. The default `120000` can still be too short for large
+real Codex tasks.
+
+Suggested cleanup:
+
+- document the env var and per-call override clearly
+- consider revisiting the default timeout once more real Codex latency data is available
+
+## Practical Conclusion
+
+At this point the server-side lifecycle and queue bugs are fixed well enough for
+Claude-driven use:
+
+- single live calls work
+- sequential live calls work
+- resumed-session live calls work
+- timeout followed by another call works
+- startup debug noise is off by default
+- reported server version matches the package version
+
+If a user still sees a hang now, the next place to investigate is no longer the
+original stdio race. It is more likely to be one of:
+
+- a real long-running Codex API call
+- Claude-side tool selection or control-flow behavior
+- an environment-specific issue outside the fixed queue/timeout path
+
+## Final Verification
+
+Final verification after the startup-noise and version-drift cleanup:
+
+### Repo Checks
+
+- `npm run lint` — PASS
+- `npm run format:check` — PASS
+- `npm run build` — PASS
+- `npm test -- --runInBand` — PASS (`13/13` suites, `88/88` tests)
+
+### Focused Regression Checks
+
+- `npm test -- --runInBand src/__tests__/runtime-config.test.ts src/__tests__/startup-logging.test.ts src/__tests__/mcp-lifecycle.test.ts --runTestsByPath`
+  - PASS (`3/3` suites, `8/8` tests)
+
+### Claude-Facing Verification
+
+A final strict stub-backed Claude probe was used to avoid live Codex account
+usage-limit noise while still exercising the Claude MCP client path.
+
+Observed result:
+
+- Claude received `stub stdout`
+- Claude debug showed `Calling MCP tool: codex`
+- Claude debug showed `Tool 'codex' completed successfully`
+- the old startup line `Server stderr: codex-mcp-server started successfully`
+  was absent
+- a strict stub-backed override probe returned:
+  - `Error in tool "codex": Tool call timed out after 100ms`
+  - `stub stdout`
+
+This confirms the final build still works through Claude and the startup banner
+no longer pollutes Claude's debug log by default, and that `timeoutMs`
+overrides the global timeout without jamming the queue.
diff --git a/docs/codex-cli-integration.md b/docs/codex-cli-integration.md
index a50dc07..95860cc 100644
--- a/docs/codex-cli-integration.md
+++ b/docs/codex-cli-integration.md
@@ -1,331 +1,108 @@
-# Codex CLI v0.75.0+ Integration Guide
+# Codex CLI Integration
 
-## Overview
-This document outlines the integration with OpenAI Codex CLI v0.75.0+, highlighting breaking changes, new features, and implementation details for the MCP server wrapper.
+Current integration notes for `codex-mcp-server`.
 
-## Version Compatibility
+## Supported Baseline
 
-### Recommended Version: v0.75.0+
-This MCP server is optimized for **codex CLI v0.75.0 or later** for full feature support.
+- Recommended minimum: Codex CLI `v0.75.0+`
+- Authentication: `codex login --api-key "..."`
+- This server expects modern `codex exec resume`, `codex review`, sandbox, and
+  working-directory support
 
-**Version History:**
-- **v0.75.0**: Added `codex review` command, sandbox modes, full-auto mode
-- **v0.74.0**: Introduced `gpt-5.2-codex` model
-- **v0.50.0**: Introduced `--skip-git-repo-check` flag, removed `--reasoning-effort` flag
-- **v0.36.0-v0.49.x**: Not compatible with this MCP server version (use older MCP releases)
+If you rely on resumed-session `fullAuto` or `bypassApprovals`, verify your
+local CLI supports them with:
 
-## Breaking Changes ⚠️
-
-### v0.75.0 Changes (Current)
-1. **Resume command moved under exec**
-   - **Old**: `codex resume <id>`
-   - **New**: `codex exec resume <id>`
-   - Impact: MCP server updated to use new command structure
-
-### v0.50.0 Changes
-1. **`--skip-git-repo-check` flag now required**
-   - Required when running outside git repositories or in untrusted directories
-   - Prevents "Not inside a trusted directory" errors
-   - Impact: All exec-based MCP server commands include this flag
-
-2. **`--reasoning-effort` flag changed**
-   - The standalone flag was removed in codex CLI v0.50.0
-   - Now passed via `-c model_reasoning_effort=<level>` config flag
-   - Impact: MCP server updated to use new config-based approach
-
-### v0.36.0 Changes (Historical)
-1. **Authentication Method Change**
-   - **Old Method**: `OPENAI_API_KEY` environment variable
-   - **New Method**: `codex login --api-key "your-api-key"`
-   - **Storage**: Credentials now stored in `CODEX_HOME/auth.json`
-   - **Impact**: Users must re-authenticate using the new login command
-
-## New Features Implemented
-
-### 1. Code Review (v0.75.0+)
-- **Command**: `codex review` (top-level subcommand)
-- **CLI Flags**:
-  - `--uncommitted`: Review staged, unstaged, and untracked changes
-  - `--base <branch>`: Review changes against a base branch
-  - `--commit <sha>`: Review changes introduced by a specific commit
-  - `--title <title>`: Optional title for review summary
-- **MCP Tool**: New `review` tool with all parameters exposed
-
-### 2. Sandbox Mode (v0.75.0+)
-- **CLI Flag**: `--sandbox <mode>`
-- **Modes**:
-  - `read-only`: No file writes allowed
-  - `workspace-write`: Writes only in workspace directory
-  - `danger-full-access`: Full system access (dangerous)
-- **MCP Parameter**: `sandbox` parameter in codex tool
-
-### 3. Full-Auto Mode (v0.75.0+)
-- **CLI Flag**: `--full-auto`
-- **Description**: Sandboxed automatic execution without approval prompts
-- **Equivalent to**: `-a on-request --sandbox workspace-write`
-- **MCP Parameter**: `fullAuto` boolean parameter
-
-### 4. Working Directory (v0.75.0+)
-- **CLI Flag**: `-C <dir>`
-- **Description**: Set working directory for the agent (global option)
-- **MCP Parameter**: `workingDirectory` parameter in codex tool and review tool
-
-### 5. Model Selection
-- **Default Model**: `gpt-5.2-codex` (optimal for agentic coding tasks)
-- **CLI Flag**: `--model <model-name>`
-- **Supported Models**:
-  - `gpt-5.2-codex` (default, specialized for agentic coding)
-  - `gpt-5.1-codex` (previous coding model)
-  - `gpt-5.1-codex-max` (enhanced coding)
-  - `gpt-5-codex` (base GPT-5 coding)
-  - `gpt-4o` (fast multimodal)
-  - `gpt-4` (advanced reasoning)
-  - `o3` (OpenAI reasoning model)
-  - `o4-mini` (compact reasoning model)
-- **Usage**: Model parameter available in `exec`, `resume`, and `review` modes (use `-c model="..."` for review/resume)
-
-### 6. Reasoning Effort Control
-- **CLI Flag**: `-c model_reasoning_effort="<level>"`
-- **Levels**: `minimal`, `low`, `medium`, `high`
-- **MCP Parameter**: `reasoningEffort` parameter in codex tool
-- **Note**: The standalone `--reasoning-effort` flag was removed in v0.50.0, now uses quoted config values for consistency
-
-### 7. Native Resume Functionality
-- **Command**: `codex exec resume <conversation-id>`
-- **Automatic ID Extraction**: Server extracts conversation IDs from CLI output (supports both "session id" and "conversation id" formats)
-- **Regex Pattern**: `/(conversation|session)\s*id\s*:\s*([a-zA-Z0-9-]+)/i`
-- **Fallback Strategy**: Manual context building when resume unavailable
-- **Session Integration**: Seamless integration with session management
-
-### 8. Thread ID Metadata (v0.87.0+)
-- **Output**: Codex CLI emits `threadId` in MCP responses
-- **Server Behavior**: This MCP server surfaces `threadId` in tool response metadata and content element metadata when present. `structuredContent` is only emitted when `STRUCTURED_CONTENT_ENABLED` is truthy and is **disabled by default**.
-- **Regex Pattern**: `/thread\s*id\s*:\s*([a-zA-Z0-9_-]+)/i`
-- **Structured Output**: The codex tool advertises an `outputSchema` for `structuredContent` (currently `threadId`) when enabled.
-
-## Features Not Yet Supported
-
-The following Codex CLI features are not currently exposed through the MCP server:
-
-| Feature | CLI Flag | Notes |
-|---------|----------|-------|
-| Image Attachments | `-i, --image` | Attach images to prompts |
-| OSS/Local Models | `--oss`, `--local-provider` | LMStudio/Ollama support |
-| Config Profiles | `-p, --profile` | Named configuration profiles |
-| Approval Policy | `-a, --ask-for-approval` | Fine-grained approval control |
-| Additional Dirs | `--add-dir` | Extra writable directories |
-| JSON Output | `--json` | JSONL event stream output |
-| Output Schema | `--output-schema` | Structured JSON output |
-| Output File | `-o, --output-last-message` | Write response to file |
-
-These features may be added in future versions based on user demand.
-
-## Web Search Implementation
-
-**Note:** This MCP server provides web search capability through the dedicated `websearch` tool. The tool uses `codex --search exec` to enable Codex's native web_search tool, providing:
-
-- Customizable result count (1-50 results)
-- Search depth control (basic/full)
-- Streaming progress updates
-- Integration with Codex's search-enabled models
-
-See the Tools section in README.md for usage examples.
-
-## MCP Callback URI (v0.81.0+)
-Codex CLI added static MCP callback URI support. This MCP server forwards the callback URI via environment variable when provided.
-
-- **Env Var**: `CODEX_MCP_CALLBACK_URI`
-- **MCP Parameter**: `callbackUri` (takes precedence over the env var)
-
-## Implementation Details
-
-### Command Construction (v0.75.0+)
-
-**IMPORTANT**: All `exec` options (`--model`, `-c`, `--skip-git-repo-check`, `-C`, `--sandbox`, `--full-auto`) must come BEFORE subcommands (`resume`).
-
-```typescript
-// Basic execution (v0.75.0+)
-['exec', '--model', selectedModel, '--skip-git-repo-check', prompt]
-
-// Execution with new parameters (v0.75.0+)
-['exec', '--model', selectedModel, '--sandbox', 'workspace-write', '--full-auto', '-C', workingDir, '--skip-git-repo-check', prompt]
-
-// Resume mode (v0.75.0+) - All exec options BEFORE 'resume' subcommand
-['exec', '--skip-git-repo-check', '-c', 'model="modelName"', '-c', 'model_reasoning_effort="high"', 'resume', conversationId, prompt]
-
-// Code review (v0.75.0+)
-['-C', workingDir, 'review', '-c', 'model="modelName"', '--uncommitted', '--base', 'main', prompt]
-
-// Code review with model config before subcommand (also accepted)
-['-C', workingDir, '-c', 'model="modelName"', 'review', '--uncommitted', '--base', 'main', prompt]
+```bash
+codex exec resume --help
 ```
 
-**Important: Resume Mode Limitations**
-
-The `codex exec resume` subcommand has a **limited set of flags** compared to `codex exec`:
-- ✅ `-c, --config` - Configuration overrides (use for model selection)
-- ✅ `--enable/--disable` - Feature toggles
-- ❌ `--model` - Not available (use `-c model="..."` instead)
-- ❌ `--sandbox` - Not available in resume mode
-- ❌ `--full-auto` - Not available in resume mode
-- ❌ `-C` - Not available in resume mode
-- ⚠️ `--skip-git-repo-check` - Must be placed on `exec` command BEFORE `resume` subcommand
+## Command Shapes Used by the Server
 
-**Important: Review Mode Limitations**
+New execution:
 
-The `codex review` subcommand also has limited flags:
-- ✅ `-c, --config` - Configuration overrides (use for model selection)
-- ✅ `--uncommitted`, `--base`, `--commit`, `--title` - Review-specific flags
-- ✅ `--enable/--disable` - Feature toggles
-- ❌ `--model` - Not available (use `-c model="..."` instead)
-- ❌ `--sandbox` - Not available in review mode
-- ❌ `--full-auto` - Not available in review mode
-- ✅ `-C` - Global option before `review`
-- ⚠️ `-c` is accepted by `codex review` and can also be passed before `review` as a global option
-
-**Key Changes in v0.75.0:**
-- Added: `codex review` subcommand for code reviews
-- Added: `--sandbox` flag for sandbox modes (exec only)
-- Added: `--full-auto` flag for automatic execution (exec only)
-- Changed: `codex resume` moved to `codex exec resume`
-- Note: Resume subcommand and review command have limited flag support
-
-**Key Changes in v0.50.0:**
-- Added: `--skip-git-repo-check` flag (exec only)
-- Changed: `--reasoning-effort` to `-c model_reasoning_effort=<level>`
-
-### Conversation ID Extraction
-```typescript
-const conversationIdMatch = result.stderr?.match(/conversation\s*id\s*:\s*([a-zA-Z0-9-]+)/i);
-if (conversationIdMatch) {
-  sessionStorage.setCodexConversationId(sessionId, conversationIdMatch[1]);
-}
+```text
+codex exec --model <model> [flags...] --skip-git-repo-check <prompt>
 ```
 
-### Error Handling Enhancements
-- **Authentication Errors**: Clear messaging for login requirement
-- **Model Validation**: Graceful handling of invalid model names
-- **Network Issues**: Proper error propagation and user feedback
-- **CLI Availability**: Detection of missing Codex CLI installation
-
-## Migration Guide
-
-### For Existing Users (Upgrading to v0.75.0+)
-1. **Check Current Version**:
-   ```bash
-   codex --version
-   ```
+Resume execution:
 
-2. **Update Codex CLI** (if below v0.75.0):
-   ```bash
-   npm update -g @openai/codex
-   # or
-   brew upgrade codex
-   ```
-
-3. **Verify Version** (must be v0.75.0 or later):
-   ```bash
-   codex --version  # Should show v0.75.0 or higher
-   ```
-
-4. **Test New Features**:
-   ```bash
-   # Test code review
-   codex review --uncommitted
+```text
+codex exec --skip-git-repo-check -c model="<model>" [-c model_reasoning_effort="..."] [flags...] resume <conversation-id> <prompt>
+```
 
-   # Test sandbox mode
-   codex exec --sandbox workspace-write --skip-git-repo-check "list files"
-   ```
+Review:
 
-### For New Users
-1. **Install Codex CLI** (v0.75.0+):
-   ```bash
-   npm install -g @openai/codex
-   # or
-   brew install codex
-   ```
+```text
+codex [-C <dir>] -c model="<model>" review [review flags...]
+```
 
-2. **Verify Version**:
-   ```bash
-   codex --version  # Must be v0.75.0 or later
-   ```
+Web search:
 
-3. **Authenticate**:
-   ```bash
-   codex login --api-key "your-openai-api-key"
-   ```
+```text
+codex --search exec --skip-git-repo-check <search prompt>
+```
 
-4. **Configure (Optional)**:
-   ```bash
-   # Edit ~/.codex/config.toml to set preferences
-   # Example:
-   # model = "gpt-5.2-codex"
-   # model_reasoning_effort = "medium"
-   ```
+## Flags the Server Exposes
 
-5. **Test Setup**:
-   ```bash
-   codex exec --skip-git-repo-check "console.log('Hello, Codex!')"
-   ```
+### `codex`
 
-## Performance Optimizations
+- `--model`
+- `-c model_reasoning_effort="..."`
+- `--sandbox <mode>` for new executions only
+- `--full-auto`
+- `--dangerously-bypass-approvals-and-sandbox`
+- `-C <dir>` for new executions only
+- `--skip-git-repo-check`
 
-### Smart Model Selection
-- **Default to gpt-5.2-codex**: Optimal for agentic coding without configuration
-- **Context-Aware Suggestions**: Better model recommendations based on task type
-- **Consistent Experience**: Same model across session interactions
+### `review`
 
-### Efficient Context Management
-- **Native Resume Priority**: Use Codex's built-in conversation continuity
-- **Fallback Context**: Only when native resume unavailable
-- **Token Optimization**: Minimal context overhead for better performance
+- `-c model="..."`
+- `-C <dir>`
+- `--uncommitted`
+- `--base <ref>`
+- `--commit <sha>`
+- `--title <text>`
 
-### Error Recovery
-- **Graceful Degradation**: Continue operation despite CLI issues
-- **Automatic Retry**: For transient network issues
-- **Clear Error Messages**: Actionable feedback for user troubleshooting
+### `websearch`
 
-## Testing Strategy
+- `--search`
+- `exec`
+- `--skip-git-repo-check`
 
-### Integration Testing
-- **CLI Command Validation**: Verify correct parameter passing
-- **Conversation ID Extraction**: Test various output formats
-- **Error Scenario Handling**: Comprehensive failure mode coverage
+## Resume Notes
 
-### Edge Case Coverage
-- **Malformed CLI Output**: Handle unexpected response formats
-- **Network Interruptions**: Graceful handling of connectivity issues
-- **Model Availability**: Handle model deprecation or unavailability
+- `sandbox` is not applied on resumed sessions.
+- `workingDirectory` is not applied on resumed sessions.
+- This server now forwards `fullAuto` and `bypassApprovals` on resume when the
+  caller asks for them.
+- The server still uses `-c model="..."` for resume mode for consistency,
+  even though newer Codex CLI builds also expose `--model`.
 
-## Best Practices
+## Timeout and Abort
 
-### For Developers
-- **Always specify model explicitly** when behavior consistency is critical
-- **Use appropriate reasoning effort** based on task complexity
-- **Implement proper error handling** for CLI interactions
-- **Monitor session lifecycle** to prevent memory leaks
+- All `tools/call` requests are serialized.
+- Global timeout: `CODEX_TOOL_TIMEOUT_MS`, default `120000`
+- `codex` also supports per-request `timeoutMs`
+- `codex`, `review`, `websearch`, and `help` now all honor the shared abort
+  path when they time out
 
-### For Users
-- **Start with default settings** for optimal experience
-- **Use sessions for complex tasks** requiring multiple interactions
-- **Choose reasoning effort wisely** to balance speed and quality
-- **Keep CLI updated** for latest features and bug fixes
+## Environment Variables Used by This Server
 
-## Troubleshooting
+- `CODEX_DEFAULT_MODEL`
+- `CODEX_MCP_CALLBACK_URI`
+- `CODEX_TOOL_TIMEOUT_MS`
+- `STRUCTURED_CONTENT_ENABLED`
+- `CODEX_MCP_DEBUG_STARTUP`
+- `CODEX_MCP_DEBUG_COMMANDS`
 
-### Common Issues
-1. **Authentication Failures**
-   - Solution: Run `codex login --api-key "your-key"`
-   - Verify: Check `CODEX_HOME/auth.json` exists
+## Current Default Model and Reasoning Values
 
-2. **Model Not Available**
-   - Solution: Use default `gpt-5.2-codex` or try alternative models
-   - Check: Codex CLI documentation for available models
+- Default model: `gpt-5.4`
+- Documented reasoning values: `none`, `minimal`, `low`, `medium`, `high`,
+  `xhigh`
 
-3. **Resume Functionality Not Working**
-   - Solution: System falls back to manual context building
-   - Check: Conversation ID extraction in server logs
+## Current Gaps
 
-4. **Performance Issues**
-   - Solution: Lower reasoning effort or use faster models
-   - Monitor: Response times and adjust parameters accordingly
\ No newline at end of file
+- `timeoutMs` is only exposed on the `codex` tool, not on `review`, `help`, or
+  `websearch`
+- Sessions are process-local and in-memory only
diff --git a/docs/plan.md b/docs/plan.md
index bbe8a2c..6acb9c0 100644
--- a/docs/plan.md
+++ b/docs/plan.md
@@ -1,5 +1,8 @@
 # Codex MCP Server Implementation Plan
 
+Historical planning document. It does not describe the current repository
+layout or current public tool surface.
+
 ## Overview
 Create an MCP server wrapper for OpenAI Codex CLI that enables single-command integration with Claude, similar to gemini-mcp-tool.
 
@@ -128,4 +131,4 @@ codex exec "Add error handling to this function"
 Enable single-command Claude integration:
 ```bash
 claude mcp add codex-cli -- npx -y codex-mcp-server
-```
\ No newline at end of file
+```
diff --git a/docs/plans/2026-04-25-timeout-followups.md b/docs/plans/2026-04-25-timeout-followups.md
new file mode 100644
index 0000000..cb45574
--- /dev/null
+++ b/docs/plans/2026-04-25-timeout-followups.md
@@ -0,0 +1,142 @@
+# Timeout Follow-Ups Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Status:** Completed on 2026-04-25
+
+**Outcome:**
+- Added validated `timeoutMs` to the `codex` tool schema.
+- Added per-call timeout override behavior on top of `CODEX_TOOL_TIMEOUT_MS`.
+- Exposed `timeoutMs` in the MCP tool definition and public docs.
+- Verified repo-wide with lint, format, build, full Jest suite, and a Claude stub probe.
+
+**Goal:** Add safer timeout controls for long Codex tasks without regressing the fixed queue and child-abort behavior.
+
+**Architecture:** Keep the existing queue and abort model intact. Add the smallest possible caller-controlled timeout override on the `codex` tool, validate it in the schema layer, and cover it with focused lifecycle tests plus one docs sweep for drifted defaults.
+
+**Tech Stack:** TypeScript, Jest, ts-jest, MCP SDK, Node child processes
+
+---
+
+### Task 1: Add a validated per-call timeout argument
+
+**Files:**
+- Modify: `src/types.ts`
+- Modify: `src/tools/handlers.ts`
+- Test: `src/__tests__/runtime-config.test.ts`
+
+**Step 1: Write the failing test**
+
+Add a schema-level test that accepts a positive `timeoutMs` and rejects `0`,
+negative values, and non-integers.
+
+**Step 2: Run test to verify it fails**
+
+Run:
+
+```bash
+npm test -- --runInBand src/__tests__/runtime-config.test.ts --runTestsByPath
+```
+
+Expected: FAIL because `timeoutMs` is not part of the codex schema yet.
+
+**Step 3: Write minimal implementation**
+
+- add optional `timeoutMs` to `CodexToolSchema`
+- thread it into the server/tool execution path without changing current
+  defaults
+
+**Step 4: Run test to verify it passes**
+
+Run the same focused test command and confirm PASS.
+
+### Task 2: Prove override behavior in lifecycle tests
+
+**Files:**
+- Modify: `src/__tests__/mcp-lifecycle.test.ts`
+- Modify: `src/server.ts`
+
+**Step 1: Write the failing test**
+
+Add a focused lifecycle test where the default timeout is high, but a single
+`tools/call` request passes a low `timeoutMs` override and must time out without
+blocking the next request.
+
+**Step 2: Run test to verify it fails**
+
+Run:
+
+```bash
+npm test -- --runInBand src/__tests__/mcp-lifecycle.test.ts -t "uses per-call timeout override" --runTestsByPath
+```
+
+Expected: FAIL because the override is ignored.
+
+**Step 3: Write minimal implementation**
+
+- prefer request `timeoutMs` over `CODEX_TOOL_TIMEOUT_MS`
+- keep the existing abort-and-wait queue semantics unchanged
+
+**Step 4: Run test to verify it passes**
+
+Run the same focused test command and confirm PASS.
+
+### Task 3: Sweep docs for timeout and default-model drift
+
+**Files:**
+- Modify: `README.md`
+- Modify: `docs/codex-cli-integration.md`
+- Modify: `docs/session-management.md`
+
+**Step 1: Write the failing check**
+
+Search for stale default model or timeout documentation:
+
+```bash
+rg -n "gpt-5\\.2-codex|120000|timeoutMs" README.md docs
+```
+
+**Step 2: Verify drift exists**
+
+Expected: at least one stale `gpt-5.2-codex` reference and no mention of
+per-call timeout override.
+
+**Step 3: Write minimal documentation updates**
+
+- align docs with the current default model
+- document `CODEX_TOOL_TIMEOUT_MS`
+- document the per-call timeout override once implemented
+
+**Step 4: Verify docs**
+
+Run the same `rg` command and confirm stale references are gone or explicitly
+intentional.
+
+### Task 4: Final verification
+
+**Files:**
+- Test: `src/__tests__/runtime-config.test.ts`
+- Test: `src/__tests__/mcp-lifecycle.test.ts`
+- Verify: repo-wide
+
+**Step 1: Run focused tests**
+
+```bash
+npm test -- --runInBand src/__tests__/runtime-config.test.ts src/__tests__/mcp-lifecycle.test.ts --runTestsByPath
+```
+
+**Step 2: Run build**
+
+```bash
+npm run build
+```
+
+**Step 3: Run full suite**
+
+```bash
+npm test -- --runInBand
+```
+
+**Step 4: Commit**
+
+Commit only after all verification is green.
diff --git a/docs/session-management.md b/docs/session-management.md
index b899aee..7e73565 100644
--- a/docs/session-management.md
+++ b/docs/session-management.md
@@ -1,143 +1,86 @@
-# Session Management Implementation Guide
-
-## Overview
-The Codex MCP Server provides advanced session management with native Codex CLI v0.50.0+ integration, enabling persistent conversational context and sophisticated AI coding assistance.
-
-Sessions are created on first use when a sessionId is provided. If no sessionId
-is supplied, this request does not create a session (so it won't appear in
-listSessions).
-
-## Architecture
-
-### Session Storage
-- **In-memory Map-based storage** with automatic cleanup
-- **UUID-based session IDs** for unique identification
-- **TTL management** - 24 hour automatic session expiration
-- **Session limit enforcement** - maximum 100 concurrent sessions
-
-### Enhanced Session Data Structure
-```typescript
-interface SessionData {
-  id: string;
-  createdAt: Date;
-  lastAccessedAt: Date;
-  turns: ConversationTurn[];
-  codexConversationId?: string; // Native Codex conversation ID
-}
+# Session Management
 
-interface ConversationTurn {
-  prompt: string;
-  response: string;
-  timestamp: Date;
-}
-```
+This server provides lightweight in-memory session management for the `codex`
+tool.
 
-### Native Codex Integration
-- **Automatic conversation ID extraction** from Codex CLI output (supports both "session id" and "conversation id" formats)
-- **Resume functionality** using `codex exec resume <conversation-id>`
-- **Fallback context building** when native resume unavailable
-- **Model consistency** across session interactions
-
-### Tool Enhancements
-
-#### Enhanced Codex Tool
-- **sessionId** (optional): Session ID for conversational context
-- **resetSession** (optional): Reset session history before processing
-- **model** (optional): Model selection (defaults to `gpt-5.2-codex`)
-- **reasoningEffort** (optional): Control reasoning depth (low/medium/high)
-- **Smart context building**: Uses native resume or fallback context
-- **Robust error handling**: Graceful degradation for various failure modes
-
-#### ListSessions Tool
-- **Session enumeration**: Returns all active session IDs with comprehensive metadata
-- **Session introspection**: Creation time, last access, turn count, conversation ID
-- **Management interface**: Enables session lifecycle monitoring
-
-## Implementation Status ✅
-
-### ✅ Completed Features
-1. **Session Storage System**
-   - InMemorySessionStorage with TTL and cleanup
-   - Defensive programming against data corruption
-   - Conversation ID tracking and management
-
-2. **Enhanced Codex Tool Handler**
-   - Native resume functionality with fallback
-   - GPT-5.2-Codex as intelligent default model
-   - Model and reasoning effort parameter support
-   - Comprehensive error handling and validation
-
-3. **ListSessions Tool**
-   - Complete session metadata exposure
-   - JSON-formatted session information
-   - Real-time session status tracking
-
-4. **Robust Testing Suite**
-   - 54 comprehensive tests covering all functionality
-   - Edge case handling and error scenario validation
-   - Integration testing with Codex CLI interactions
-
-## Advanced Benefits
-- **Native Codex Resume**: Optimal conversation continuity using Codex CLI's built-in resume feature
-- **Intelligent Defaults**: GPT-5.2-Codex model selection for superior agentic coding assistance
-- **Production-Ready**: Comprehensive error handling, data validation, and graceful degradation
-- **Enterprise-Scale**: Session management suitable for professional development workflows
-- **Flexible Configuration**: Per-request model and reasoning effort customization
-
-## Usage Patterns
-
-### Basic Session Usage
-```bash
-# Explicit session management (creates the session on first use)
-codex --sessionId "auth-review" "Continue analysis"
-codex --sessionId "auth-review" --resetSession true "Start fresh review"
-```
+## What a Session Is
+
+- A session is keyed by a caller-supplied `sessionId`.
+- Valid IDs are `1` to `256` characters and may contain only letters, numbers,
+  `_`, and `-`.
+- Sessions exist only inside the current server process.
+- If the server exits or is restarted, all sessions are lost.
+
+## What the Server Stores
+
+For each session, the server keeps:
+
+- `id`
+- `createdAt`
+- `lastAccessedAt`
+- `turns`
+- `codexConversationId` when Codex CLI emits one
+
+Limits:
+
+- TTL: 24 hours of inactivity
+- Maximum active sessions: 100
 
-### Advanced Configuration
-```bash
-# Model and reasoning control
-codex --model "gpt-4" --reasoningEffort "high" "Complex architectural analysis"
+## How Resume Works
 
-# Session with custom parameters
-codex --sessionId "deep-dive" --model "gpt-4" --reasoningEffort "high" "Advanced optimization review"
+1. The first `codex` call with a `sessionId` creates the session.
+2. If Codex CLI emits a conversation/session ID, the server stores it.
+3. Later calls with the same `sessionId` use `codex exec resume <conversation-id>`.
+4. If no native Codex conversation ID exists yet, the server falls back to a
+   short prompt-context reconstruction using the most recent turns.
 
-# Session management
-listSessions  # View all active sessions
+## Resume Constraints
+
+- `sandbox` is not applied on resumed sessions.
+- `workingDirectory` is not applied on resumed sessions.
+- This server forwards `fullAuto` and `bypassApprovals` on resumed sessions.
+  Whether those flags are accepted depends on the installed Codex CLI version.
+
+## MCP Examples
+
+Create or continue a session:
+
+```json
+{
+  "name": "codex",
+  "arguments": {
+    "prompt": "Audit this module for race conditions",
+    "sessionId": "race-audit"
+  }
+}
 ```
 
-## Technical Architecture
-
-### Command Flow
-```mermaid
-graph TD
-    A[User Request] --> B{Session ID provided?}
-    B -->|Yes| C[Ensure Session Exists]
-    C --> D{Reset Session?}
-    B -->|No| M[Execute with Default Model]
-    D -->|Yes| E[Clear Session History]
-    D -->|No| F{Codex Conversation ID exists?}
-    E --> G[Execute with Default Model]
-    F -->|Yes| H[Use Codex Resume]
-    F -->|No| I[Build Enhanced Context]
-    H --> J[Execute with Parameters]
-    I --> J
-    G --> K[Extract Conversation ID]
-    J --> L[Save Turn to Session]
-    K --> L
-    M --> N[Return Response]
-    L --> N
+Reset a session:
+
+```json
+{
+  "name": "codex",
+  "arguments": {
+    "prompt": "Start over from scratch",
+    "sessionId": "race-audit",
+    "resetSession": true
+  }
+}
 ```
 
-### Error Handling Strategy
-- **Graceful Degradation**: System continues operation even with corrupted session data
-- **Defensive Programming**: Validates array structures and handles null/undefined gracefully
-- **Comprehensive Logging**: Error context preserved for debugging and monitoring
-- **Fallback Mechanisms**: Manual context building when native resume fails
+List current sessions:
+
+```json
+{
+  "name": "listSessions",
+  "arguments": {}
+}
+```
 
-### Performance Considerations
-- **Memory Management**: In-memory, per-process sessions with automatic cleanup
-  of expired sessions (24hr TTL)
-- **Session Limits**: Maximum 100 concurrent sessions to prevent memory exhaustion
-- **Context Optimization**: Only recent turns (last 2) used for manual context building
-- **Efficient Storage**: Minimal session metadata for optimal memory usage      
+## Operational Notes
 
+- `listSessions` only shows sessions for the current running server.
+- A caller can choose meaningful IDs like `refactor-auth-flow`; IDs are not
+  UUID-only.
+- Native Codex resume is preferred when available because it preserves Codex’s
+  own conversation state, not just the server’s short turn history.
diff --git a/src/__tests__/command-logging.test.ts b/src/__tests__/command-logging.test.ts
new file mode 100644
index 0000000..630b068
--- /dev/null
+++ b/src/__tests__/command-logging.test.ts
@@ -0,0 +1,93 @@
+// Mock chalk to avoid ESM issues in Jest
+jest.mock('chalk', () => ({
+  __esModule: true,
+  default: {
+    blue: (text: string) => text,
+    yellow: (text: string) => text,
+    green: (text: string) => text,
+    red: (text: string) => text,
+  },
+}));
+
+jest.mock('child_process', () => ({
+  spawn: jest.fn(),
+}));
+
+import { EventEmitter } from 'events';
+import { Buffer } from 'node:buffer';
+
+import { executeCommand } from '../utils/command.js';
+import { COMMAND_LOG_ENV_VAR } from '../runtime-config.js';
+import { spawn } from 'child_process';
+
+type MockedSpawn = jest.MockedFunction<typeof spawn>;
+type MockChild = ReturnType<typeof spawn> &
+  EventEmitter & {
+    stderr: EventEmitter;
+    stdout: EventEmitter;
+  };
+
+function createMockChild() {
+  const child = new EventEmitter() as unknown as MockChild;
+
+  Object.assign(child, {
+    stdout: new EventEmitter(),
+    stderr: new EventEmitter(),
+    stdin: { end: jest.fn() } as unknown as ReturnType<typeof spawn>['stdin'],
+    kill: jest.fn(),
+  });
+  Object.defineProperty(child, 'pid', { value: 12345 });
+
+  return child;
+}
+
+describe('command logging', () => {
+  const mockedSpawn = spawn as MockedSpawn;
+
+  beforeEach(() => {
+    jest.restoreAllMocks();
+    mockedSpawn.mockReset();
+    Reflect.deleteProperty(process.env, COMMAND_LOG_ENV_VAR);
+  });
+
+  test('does not log command execution details by default', async () => {
+    const child = createMockChild();
+    mockedSpawn.mockReturnValue(child);
+    const errorSpy = jest.spyOn(console, 'error').mockImplementation(() => {});
+
+    const commandPromise = executeCommand('codex', ['exec', 'test prompt']);
+
+    child.stderr.emit('data', Buffer.from('stderr output'));
+    child.emit('close', 0);
+
+    await expect(commandPromise).resolves.toEqual({
+      stdout: '',
+      stderr: 'stderr output',
+    });
+    expect(errorSpy).not.toHaveBeenCalled();
+  });
+
+  test('logs command execution details when explicitly enabled', async () => {
+    process.env[COMMAND_LOG_ENV_VAR] = '1';
+
+    const child = createMockChild();
+    mockedSpawn.mockReturnValue(child);
+    const errorSpy = jest.spyOn(console, 'error').mockImplementation(() => {});
+
+    const commandPromise = executeCommand('codex', ['exec', 'test prompt']);
+
+    child.stderr.emit('data', Buffer.from('stderr output'));
+    child.emit('close', 0);
+
+    await expect(commandPromise).resolves.toEqual({
+      stdout: '',
+      stderr: 'stderr output',
+    });
+    expect(errorSpy).toHaveBeenCalledWith(
+      'Executing:',
+      'codex',
+      'exec test prompt'
+    );
+    expect(errorSpy).toHaveBeenCalledWith('Command stderr:', 'stderr output');
+  });
+});
diff --git a/src/__tests__/context-building.test.ts b/src/__tests__/context-building.test.ts
index 74d81ef..b0be669 100644
--- a/src/__tests__/context-building.test.ts
+++ b/src/__tests__/context-building.test.ts
@@ -1,18 +1,17 @@
-import { CodexToolHandler } from '../tools/handlers.js';
+import type { CodexToolHandler as CodexToolHandlerType } from '../tools/handlers.js';
 import { InMemorySessionStorage } from '../session/storage.js';
-import { executeCommand } from '../utils/command.js';
 
 // Mock the command execution
 jest.mock('../utils/command.js', () => ({
   executeCommand: jest.fn(),
 }));
 
-const mockedExecuteCommand = executeCommand as jest.MockedFunction<
-  typeof executeCommand
->;
-
 describe('Context Building Analysis', () => {
-  let handler: CodexToolHandler;
+  let CodexToolHandler: typeof import('../tools/handlers.js').CodexToolHandler;
+  let handler: CodexToolHandlerType;
+  let mockedExecuteCommand: jest.MockedFunction<
+    typeof import('../utils/command.js').executeCommand
+  >;
   let sessionStorage: InMemorySessionStorage;
   let originalStructuredContent: string | undefined;
 
@@ -28,7 +27,14 @@ describe('Context Building Analysis', () => {
     }
   });
 
-  beforeEach(() => {
+  beforeEach(async () => {
+    jest.resetModules();
+    process.env.STRUCTURED_CONTENT_ENABLED = '1';
+    ({ CodexToolHandler } = await import('../tools/handlers.js'));
+    const commandModule = await import('../utils/command.js');
+    mockedExecuteCommand = commandModule.executeCommand as jest.MockedFunction<
+      typeof commandModule.executeCommand
+    >;
     sessionStorage = new InMemorySessionStorage();
     handler = new CodexToolHandler(sessionStorage);
     mockedExecuteCommand.mockClear();
@@ -36,7 +42,6 @@ describe('Context Building Analysis', () => {
       stdout: 'Test response',
       stderr: '',
     });
-    process.env.STRUCTURED_CONTENT_ENABLED = '1';
   });
 
   test('should build enhanced prompt correctly', async () => {
@@ -61,7 +66,7 @@ describe('Context Building Analysis', () => {
 
     // Check what prompt was sent to Codex - should be enhanced but not conversational
     const call = mockedExecuteCommand.mock.calls[0];
-    const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt
+    const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.4, --skip-git-repo-check, prompt
     expect(sentPrompt).toContain('Previous code context:');
     expect(sentPrompt).toContain('Task: Make it more efficient');
     expect(sentPrompt).not.toContain('Previous: What is recursion?'); // No conversational format
diff --git a/src/__tests__/default-model.test.ts b/src/__tests__/default-model.test.ts
index 4329e5f..fa4fec3 100644
--- a/src/__tests__/default-model.test.ts
+++ b/src/__tests__/default-model.test.ts
@@ -1,18 +1,17 @@
-import { CodexToolHandler } from '../tools/handlers.js';
+import type { CodexToolHandler as CodexToolHandlerType } from '../tools/handlers.js';
 import { InMemorySessionStorage } from '../session/storage.js';
-import { executeCommand } from '../utils/command.js';
 
 // Mock the command execution
 jest.mock('../utils/command.js', () => ({
   executeCommand: jest.fn(),
 }));
 
-const mockedExecuteCommand = executeCommand as jest.MockedFunction<
-  typeof executeCommand
->;
-
 describe('Default Model Configuration', () => {
-  let handler: CodexToolHandler;
+  let CodexToolHandler: typeof import('../tools/handlers.js').CodexToolHandler;
+  let handler: CodexToolHandlerType;
+  let mockedExecuteCommand: jest.MockedFunction<
+    typeof import('../utils/command.js').executeCommand
+  >;
   let sessionStorage: InMemorySessionStorage;
   let originalStructuredContent: string | undefined;
 
@@ -28,7 +27,14 @@ describe('Default Model Configuration', () => {
     }
   });
 
-  beforeEach(() => {
+  beforeEach(async () => {
+    jest.resetModules();
+    process.env.STRUCTURED_CONTENT_ENABLED = '1';
+    ({ CodexToolHandler } = await import('../tools/handlers.js'));
+    const commandModule = await import('../utils/command.js');
+    mockedExecuteCommand = commandModule.executeCommand as jest.MockedFunction<
+      typeof commandModule.executeCommand
+    >;
     sessionStorage = new InMemorySessionStorage();
     handler = new CodexToolHandler(sessionStorage);
     mockedExecuteCommand.mockClear();
@@ -36,22 +42,15 @@ describe('Default Model Configuration', () => {
       stdout: 'Test response',
       stderr: '',
     });
-    process.env.STRUCTURED_CONTENT_ENABLED = '1';
     delete process.env.CODEX_MCP_CALLBACK_URI;
   });
 
-  test('should use gpt-5.3-codex as default model when no model specified', async () => {
+  test('should use gpt-5.4 as default model when no model specified', async () => {
     await handler.execute({ prompt: 'Test prompt' });
 
     expect(mockedExecuteCommand).toHaveBeenCalledWith(
       'codex',
-      [
-        'exec',
-        '--model',
-        'gpt-5.3-codex',
-        '--skip-git-repo-check',
-        'Test prompt',
-      ],
+      ['exec', '--model', 'gpt-5.4', '--skip-git-repo-check', 'Test prompt'],
       expect.any(Object)
     );
   });
@@ -59,8 +58,8 @@ describe('Default Model Configuration', () => {
   test('should include default model in response metadata', async () => {
     const result = await handler.execute({ prompt: 'Test prompt' });
 
-    expect(result.content[0]._meta?.model).toBe('gpt-5.3-codex');
-    expect(result.structuredContent?.model).toBe('gpt-5.3-codex');
+    expect(result.content[0]._meta?.model).toBe('gpt-5.4');
+    expect(result.structuredContent?.model).toBe('gpt-5.4');
     expect(result._meta?.callbackUri).toBeUndefined();
   });
 
@@ -87,13 +86,7 @@ describe('Default Model Configuration', () => {
 
     expect(mockedExecuteCommand).toHaveBeenCalledWith(
       'codex',
-      [
-        'exec',
-        '--model',
-        'gpt-5.3-codex',
-        '--skip-git-repo-check',
-        'Test prompt',
-      ],
+      ['exec', '--model', 'gpt-5.4', '--skip-git-repo-check', 'Test prompt'],
       expect.any(Object)
     );
   });
@@ -114,7 +107,7 @@ describe('Default Model Configuration', () => {
         'exec',
         '--skip-git-repo-check',
         '-c',
-        'model="gpt-5.3-codex"',
+        'model="gpt-5.4"',
         'resume',
         'existing-conv-id',
         'Resume with default model',
@@ -134,7 +127,7 @@ describe('Default Model Configuration', () => {
       [
         'exec',
         '--model',
-        'gpt-5.3-codex',
+        'gpt-5.4',
         '-c',
         'model_reasoning_effort="high"',
         '--skip-git-repo-check',
diff --git a/src/__tests__/edge-cases.test.ts b/src/__tests__/edge-cases.test.ts
index 2ec8f6e..a511190 100644
--- a/src/__tests__/edge-cases.test.ts
+++ b/src/__tests__/edge-cases.test.ts
@@ -133,7 +133,7 @@ describe('Edge Cases and Integration Issues', () => {
 
     // Should only use recent turns, not crash with too much context
     const call = mockedExecuteCommand.mock.calls[0];
-    const prompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt
+    const prompt = call?.[1]?.[4]; // After exec, --model, gpt-5.4, --skip-git-repo-check, prompt
     expect(typeof prompt).toBe('string');
     if (prompt) {
       expect(prompt.length).toBeLessThan(5000); // Reasonable limit
diff --git a/src/__tests__/error-scenarios.test.ts b/src/__tests__/error-scenarios.test.ts
index ba4f57b..b656c8c 100644
--- a/src/__tests__/error-scenarios.test.ts
+++ b/src/__tests__/error-scenarios.test.ts
@@ -111,6 +111,30 @@ describe('Error Handling Scenarios', () => {
     expect(mockedExecuteCommand).not.toHaveBeenCalled();
   });
 
+  test('should reject bypassApprovals when sandbox is also set', async () => {
+    await expect(
+      handler.execute({
+        prompt: 'Test prompt',
+        sandbox: 'workspace-write',
+        bypassApprovals: true,
+      })
+    ).rejects.toThrow(ValidationError);
+
+    expect(mockedExecuteCommand).not.toHaveBeenCalled();
+  });
+
+  test('should reject bypassApprovals when fullAuto is also set', async () => {
+    await expect(
+      handler.execute({
+        prompt: 'Test prompt',
+        fullAuto: true,
+        bypassApprovals: true,
+      })
+    ).rejects.toThrow(ValidationError);
+
+    expect(mockedExecuteCommand).not.toHaveBeenCalled();
+  });
+
   test('should handle corrupted session data', async () => {
     const sessionId = sessionStorage.createSession();
 
@@ -157,7 +181,7 @@ describe('Error Handling Scenarios', () => {
     expect(result.content[0].text).toBe('Response');
     expect(mockedExecuteCommand).toHaveBeenCalledWith(
       'codex',
-      ['exec', '--model', 'gpt-5.3-codex', '--skip-git-repo-check', longPrompt],
+      ['exec', '--model', 'gpt-5.4', '--skip-git-repo-check', longPrompt],
       expect.any(Object)
     );
   });
diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts
index 1762942..c362728 100644
--- a/src/__tests__/index.test.ts
+++ b/src/__tests__/index.test.ts
@@ -65,6 +65,24 @@ describe('Codex MCP Server', () => {
       expect(codexTool?.description).toContain('Execute Codex CLI');
     });
 
+    test('codex tool should expose timeout override parameter', () => {
+      const codexTool = toolDefinitions.find(
+        (tool) => tool.name === TOOLS.CODEX
+      );
+      expect(codexTool?.inputSchema.properties).toHaveProperty('timeoutMs');
+    });
+
+    test('codex tool should expose bypass approvals parameter', () => {
+      const codexTool = toolDefinitions.find(
+        (tool) => tool.name === TOOLS.CODEX
+      );
+      const bypassApprovals = codexTool?.inputSchema.properties
+        .bypassApprovals as { type?: string; description?: string } | undefined;
+
+      expect(bypassApprovals?.type).toBe('boolean');
+      expect(bypassApprovals?.description).toContain('externally sandboxed');
+    });
+
     test('ping tool should have optional message parameter', () => {
       const pingTool = toolDefinitions.find((tool) => tool.name === TOOLS.PING);
       expect(pingTool).toBeDefined();
@@ -152,7 +170,7 @@ describe('Codex MCP Server', () => {
       const result = {
         content: [{ type: 'text', text: 'ok', _meta: { threadId: 'th_123' } }],
         structuredContent: { threadId: 'th_123' },
-        _meta: { model: 'gpt-5.3-codex' },
+        _meta: { model: 'gpt-5.4' },
       };
 
       const parsed = CallToolResultSchema.safeParse(result);
diff --git a/src/__tests__/mcp-lifecycle.test.ts b/src/__tests__/mcp-lifecycle.test.ts
new file mode 100644
index 0000000..776fba3
--- /dev/null
+++ b/src/__tests__/mcp-lifecycle.test.ts
@@ -0,0 +1,695 @@
+import { spawn } from 'child_process';
+import { chmodSync, existsSync, mkdtempSync, rmSync, writeFileSync } from 'fs';
+import { tmpdir } from 'os';
+import path from 'path';
+import { promisify } from 'util';
+import { exec } from 'child_process';
+
+const execAsync = promisify(exec);
+
+const JSONRPC_VERSION = '2.0';
+const TEST_TIMEOUT_MS = 10000;
+type ExitSignal = string | null;
+
+async function ensureBuild(distPath: string): Promise<void> {
+  if (existsSync(distPath)) return;
+  await execAsync('npm run build');
+}
+
+function createCodexStub(): string {
+  const stubDir = mkdtempSync(path.join(tmpdir(), 'codex-mcp-life-'));
+  const stubPath = path.join(stubDir, 'codex');
+  const lockDir = path.join(stubDir, 'active.lock');
+  const stubScript = `#!/bin/sh
+LOCK_DIR=${JSON.stringify(lockDir)}
+if ! mkdir "$LOCK_DIR" 2>/dev/null; then
+printf "Not connected\\n" 1>&2
+exit 1
+fi
+cleanup() {
+  rmdir "$LOCK_DIR" 2>/dev/null || true
+}
+trap cleanup EXIT INT TERM
+sleep 0.2
+printf "stub stdout\\n"
+printf "thread id: th_lifecycle_123\\n" 1>&2
+printf "session id: sess_lifecycle_123\\n" 1>&2
+exit 0
+`;
+  writeFileSync(stubPath, stubScript, { mode: 0o755 });
+  chmodSync(stubPath, 0o755);
+  return stubDir;
+}
+
+function createTimeoutCodexStub(): string {
+  const stubDir = mkdtempSync(path.join(tmpdir(), 'codex-mcp-timeout-'));
+  const stubPath = path.join(stubDir, 'codex');
+  const lockDir = path.join(stubDir, 'active.lock');
+  const stubScript = `#!/bin/sh
+LOCK_DIR=${JSON.stringify(lockDir)}
+if ! mkdir "$LOCK_DIR" 2>/dev/null; then
+printf "Not connected\\n" 1>&2
+exit 1
+fi
+cleanup() {
+  rmdir "$LOCK_DIR" 2>/dev/null || true
+}
+trap cleanup EXIT INT TERM
+if printf "%s" "$*" | grep -q "slow-timeout"; then
+  sleep 5
+else
+  sleep 0.01
+fi
+printf "stub stdout\\n"
+printf "thread id: th_timeout_123\\n" 1>&2
+printf "session id: sess_timeout_123\\n" 1>&2
+exit 0
+`;
+  writeFileSync(stubPath, stubScript, { mode: 0o755 });
+  chmodSync(stubPath, 0o755);
+  return stubDir;
+}
+
+describe('MCP server lifecycle', () => {
+  jest.setTimeout(TEST_TIMEOUT_MS);
+
+  let server: ReturnType<typeof spawn> | null = null;
+  let stubDir: string | null = null;
+  let buffer = '';
+  let exitCode: number | null = null;
+  let exitSignal: ExitSignal = null;
+  const pending = new Map<number, (payload: unknown) => void>();
+
+  const sendRequest = (request: Record<string, unknown>) =>
+    new Promise<unknown>((resolve, reject) => {
+      if (!server?.stdin) {
+        reject(new Error('Server stdin not available'));
+        return;
+      }
+
+      const id = request.id as number;
+      const timer = globalThis.setTimeout(() => {
+        pending.delete(id);
+        reject(new Error(`Timed out waiting for response ${id}`));
+      }, TEST_TIMEOUT_MS);
+
+      pending.set(id, (payload) => {
+        globalThis.clearTimeout(timer);
+        resolve(payload);
+      });
+
+      server.stdin.write(`${JSON.stringify(request)}\n`);
+    });
+
+  beforeAll(async () => {
+    const distPath = path.join(process.cwd(), 'dist', 'index.js');
+    await ensureBuild(distPath);
+    stubDir = createCodexStub();
+
+    server = spawn(process.execPath, [distPath], {
+      env: {
+        ...process.env,
+        PATH: `${stubDir}${path.delimiter}${process.env.PATH}`,
+      },
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+
+    server.stdout?.setEncoding('utf8');
+    server.stdout?.on('data', (chunk: string) => {
+      buffer += chunk;
+      let newlineIndex = buffer.indexOf('\n');
+
+      while (newlineIndex >= 0) {
+        const line = buffer.slice(0, newlineIndex).trim();
+        buffer = buffer.slice(newlineIndex + 1);
+
+        if (line) {
+          try {
+            const payload = JSON.parse(line) as {
+              id?: number;
+              result?: unknown;
+            };
+
+            if (typeof payload.id === 'number') {
+              const resolver = pending.get(payload.id);
+              if (resolver) {
+                resolver(payload.result ?? payload);
+                pending.delete(payload.id);
+              }
+            }
+          } catch {
+            // Ignore non-JSON output
+          }
+        }
+
+        newlineIndex = buffer.indexOf('\n');
+      }
+    });
+
+    server.stderr?.on('data', () => {});
+    server.on('exit', (code, signal) => {
+      exitCode = code;
+      exitSignal = signal;
+    });
+  });
+
+  afterAll(async () => {
+    if (server && exitCode === null && exitSignal === null) {
+      server.kill();
+      await new Promise((resolve) => server?.once('exit', resolve));
+    }
+
+    if (stubDir) {
+      rmSync(stubDir, { recursive: true, force: true });
+    }
+  });
+
+  test('stays alive across sequential codex tool calls', async () => {
+    await sendRequest({
+      jsonrpc: JSONRPC_VERSION,
+      id: 1,
+      method: 'initialize',
+      params: {
+        protocolVersion: '2024-11-05',
+        capabilities: {},
+        clientInfo: { name: 'test', version: '1' },
+      },
+    });
+
+    server?.stdin?.write(
+      `${JSON.stringify({
+        jsonrpc: JSONRPC_VERSION,
+        method: 'notifications/initialized',
+        params: {},
+      })}\n`
+    );
+
+    await sendRequest({
+      jsonrpc: JSONRPC_VERSION,
+      id: 2,
+      method: 'tools/list',
+      params: {},
+    });
+
+    const firstResponse = (await sendRequest({
+      jsonrpc: JSONRPC_VERSION,
+      id: 3,
+      method: 'tools/call',
+      params: { name: 'codex', arguments: { prompt: 'echo hello' } },
+    })) as {
+      content: Array<{ text: string }>;
+    };
+
+    expect(firstResponse.content[0]?.text).toBe('stub stdout\n');
+    expect({ exitCode, exitSignal }).toEqual({
+      exitCode: null,
+      exitSignal: null,
+    });
+
+    await new Promise((resolve) => globalThis.setTimeout(resolve, 250));
+    expect({ exitCode, exitSignal }).toEqual({
+      exitCode: null,
+      exitSignal: null,
+    });
+
+    const secondResponse = (await sendRequest({
+      jsonrpc: JSONRPC_VERSION,
+      id: 4,
+      method: 'tools/call',
+      params: { name: 'codex', arguments: { prompt: 'echo world' } },
+    })) as {
+      content: Array<{ text: string }>;
+    };
+
+    expect(secondResponse.content[0]?.text).toBe('stub stdout\n');
+    expect({ exitCode, exitSignal }).toEqual({
+      exitCode: null,
+      exitSignal: null,
+    });
+  });
+
+  test('serializes concurrent tools/call requests', async () => {
+    const firstResponsePromise = sendRequest({
+      jsonrpc: JSONRPC_VERSION,
+      id: 5,
+      method: 'tools/call',
+      params: { name: 'codex', arguments: { prompt: 'echo concurrent one' } },
+    });
+
+    const secondResponsePromise = sendRequest({
+      jsonrpc: JSONRPC_VERSION,
+      id: 6,
+      method: 'tools/call',
+      params: { name: 'codex', arguments: { prompt: 'echo concurrent two' } },
+    });
+
+    const [firstResponse, secondResponse] = (await Promise.all([
+      firstResponsePromise,
+      secondResponsePromise,
+    ])) as Array<{ content: Array<{ text: string }> }>;
+
+    expect(firstResponse.content[0]?.text).toBe('stub stdout\n');
+    expect(secondResponse.content[0]?.text).toBe('stub stdout\n');
+    expect({ exitCode, exitSignal }).toEqual({
+      exitCode: null,
+      exitSignal: null,
+    });
+  });
+
+  test('kills child process on timeout', async () => {
+    const distPath = path.join(process.cwd(), 'dist', 'index.js');
+    await ensureBuild(distPath);
+
+    const timeoutStubDir = createTimeoutCodexStub();
+    let timeoutServer: ReturnType<typeof spawn> | null = null;
+    let timeoutBuffer = '';
+    let timeoutExitCode: number | null = null;
+    let timeoutExitSignal: ExitSignal = null;
+    const timeoutPending = new Map<number, (payload: unknown) => void>();
+
+    const sendTimeoutRequest = (request: Record<string, unknown>) =>
+      new Promise<unknown>((resolve, reject) => {
+        if (!timeoutServer?.stdin) {
+          reject(new Error('Timeout test server stdin not available'));
+          return;
+        }
+
+        const id = request.id as number;
+        const timer = globalThis.setTimeout(() => {
+          timeoutPending.delete(id);
+          reject(new Error(`Timed out waiting for response ${id}`));
+        }, TEST_TIMEOUT_MS);
+
+        timeoutPending.set(id, (payload) => {
+          globalThis.clearTimeout(timer);
+          resolve(payload);
+        });
+
+        timeoutServer.stdin.write(`${JSON.stringify(request)}\n`);
+      });
+
+    try {
+      timeoutServer = spawn(process.execPath, [distPath], {
+        env: {
+          ...process.env,
+          PATH: `${timeoutStubDir}${path.delimiter}${process.env.PATH}`,
+          CODEX_TOOL_TIMEOUT_MS: '100',
+        },
+        stdio: ['pipe', 'pipe', 'pipe'],
+      });
+
+      timeoutServer.stdout?.setEncoding('utf8');
+      timeoutServer.stdout?.on('data', (chunk: string) => {
+        timeoutBuffer += chunk;
+        let newlineIndex = timeoutBuffer.indexOf('\n');
+
+        while (newlineIndex >= 0) {
+          const line = timeoutBuffer.slice(0, newlineIndex).trim();
+          timeoutBuffer = timeoutBuffer.slice(newlineIndex + 1);
+
+          if (line) {
+            try {
+              const payload = JSON.parse(line) as {
+                id?: number;
+                result?: unknown;
+              };
+
+              if (typeof payload.id === 'number') {
+                const resolver = timeoutPending.get(payload.id);
+                if (resolver) {
+                  resolver(payload.result ?? payload);
+                  timeoutPending.delete(payload.id);
+                }
+              }
+            } catch {
+              // Ignore non-JSON output
+            }
+          }
+
+          newlineIndex = timeoutBuffer.indexOf('\n');
+        }
+      });
+
+      timeoutServer.stderr?.on('data', () => {});
+      timeoutServer.on('exit', (code, signal) => {
+        timeoutExitCode = code;
+        timeoutExitSignal = signal;
+      });
+
+      await sendTimeoutRequest({
+        jsonrpc: JSONRPC_VERSION,
+        id: 100,
+        method: 'initialize',
+        params: {
+          protocolVersion: '2024-11-05',
+          capabilities: {},
+          clientInfo: { name: 'test-timeout', version: '1' },
+        },
+      });
+
+      timeoutServer.stdin?.write(
+        `${JSON.stringify({
+          jsonrpc: JSONRPC_VERSION,
+          method: 'notifications/initialized',
+          params: {},
+        })}\n`
+      );
+
+      const timedOutResponse = (await sendTimeoutRequest({
+        jsonrpc: JSONRPC_VERSION,
+        id: 101,
+        method: 'tools/call',
+        params: { name: 'codex', arguments: { prompt: 'slow-timeout' } },
+      })) as {
+        content: Array<{ text: string }>;
+        isError?: boolean;
+      };
+
+      expect(timedOutResponse.isError).toBe(true);
+      expect(timedOutResponse.content[0]?.text).toContain(
+        'Tool call timed out after 100ms'
+      );
+
+      const secondResponse = (await sendTimeoutRequest({
+        jsonrpc: JSONRPC_VERSION,
+        id: 102,
+        method: 'tools/call',
+        params: { name: 'codex', arguments: { prompt: 'fast-after-timeout' } },
+      })) as {
+        content: Array<{ text: string }>;
+      };
+
+      expect(secondResponse.content[0]?.text).toBe('stub stdout\n');
+      expect({ timeoutExitCode, timeoutExitSignal }).toEqual({
+        timeoutExitCode: null,
+        timeoutExitSignal: null,
+      });
+    } finally {
+      if (
+        timeoutServer &&
+        timeoutExitCode === null &&
+        timeoutExitSignal === null
+      ) {
+        timeoutServer.kill();
+        await new Promise((resolve) => timeoutServer?.once('exit', resolve));
+      }
+      rmSync(timeoutStubDir, { recursive: true, force: true });
+    }
+  });
+
+  test('uses per-call timeout override', async () => {
+    const distPath = path.join(process.cwd(), 'dist', 'index.js');
+    await ensureBuild(distPath);
+
+    const timeoutStubDir = createTimeoutCodexStub();
+    let timeoutServer: ReturnType<typeof spawn> | null = null;
+    let timeoutBuffer = '';
+    let timeoutExitCode: number | null = null;
+    let timeoutExitSignal: ExitSignal = null;
+    const timeoutPending = new Map<number, (payload: unknown) => void>();
+
+    const sendTimeoutRequest = (request: Record<string, unknown>) =>
+      new Promise<unknown>((resolve, reject) => {
+        if (!timeoutServer?.stdin) {
+          reject(new Error('Timeout override test server stdin not available'));
+          return;
+        }
+
+        const id = request.id as number;
+        const timer = globalThis.setTimeout(() => {
+          timeoutPending.delete(id);
+          reject(new Error(`Timed out waiting for response ${id}`));
+        }, TEST_TIMEOUT_MS);
+
+        timeoutPending.set(id, (payload) => {
+          globalThis.clearTimeout(timer);
+          resolve(payload);
+        });
+
+        timeoutServer.stdin.write(`${JSON.stringify(request)}\n`);
+      });
+
+    try {
+      timeoutServer = spawn(process.execPath, [distPath], {
+        env: {
+          ...process.env,
+          PATH: `${timeoutStubDir}${path.delimiter}${process.env.PATH}`,
+          CODEX_TOOL_TIMEOUT_MS: '9000',
+        },
+        stdio: ['pipe', 'pipe', 'pipe'],
+      });
+
+      timeoutServer.stdout?.setEncoding('utf8');
+      timeoutServer.stdout?.on('data', (chunk: string) => {
+        timeoutBuffer += chunk;
+        let newlineIndex = timeoutBuffer.indexOf('\n');
+
+        while (newlineIndex >= 0) {
+          const line = timeoutBuffer.slice(0, newlineIndex).trim();
+          timeoutBuffer = timeoutBuffer.slice(newlineIndex + 1);
+
+          if (line) {
+            try {
+              const payload = JSON.parse(line) as {
+                id?: number;
+                result?: unknown;
+              };
+
+              if (typeof payload.id === 'number') {
+                const resolver = timeoutPending.get(payload.id);
+                if (resolver) {
+                  resolver(payload.result ?? payload);
+                  timeoutPending.delete(payload.id);
+                }
+              }
+            } catch {
+              // Ignore non-JSON output
+            }
+          }
+
+          newlineIndex = timeoutBuffer.indexOf('\n');
+        }
+      });
+
+      timeoutServer.stderr?.on('data', () => {});
+      timeoutServer.on('exit', (code, signal) => {
+        timeoutExitCode = code;
+        timeoutExitSignal = signal;
+      });
+
+      await sendTimeoutRequest({
+        jsonrpc: JSONRPC_VERSION,
+        id: 200,
+        method: 'initialize',
+        params: {
+          protocolVersion: '2024-11-05',
+          capabilities: {},
+          clientInfo: { name: 'test-timeout-override', version: '1' },
+        },
+      });
+
+      timeoutServer.stdin?.write(
+        `${JSON.stringify({
+          jsonrpc: JSONRPC_VERSION,
+          method: 'notifications/initialized',
+          params: {},
+        })}\n`
+      );
+
+      const timedOutResponse = (await sendTimeoutRequest({
+        jsonrpc: JSONRPC_VERSION,
+        id: 201,
+        method: 'tools/call',
+        params: {
+          name: 'codex',
+          arguments: { prompt: 'slow-timeout', timeoutMs: 100 },
+        },
+      })) as {
+        content: Array<{ text: string }>;
+        isError?: boolean;
+      };
+
+      expect(timedOutResponse.isError).toBe(true);
+      expect(timedOutResponse.content[0]?.text).toContain(
+        'Tool call timed out after 100ms'
+      );
+
+      const secondResponse = (await sendTimeoutRequest({
+        jsonrpc: JSONRPC_VERSION,
+        id: 202,
+        method: 'tools/call',
+        params: { name: 'codex', arguments: { prompt: 'fast-after-timeout' } },
+      })) as {
+        content: Array<{ text: string }>;
+      };
+
+      expect(secondResponse.content[0]?.text).toBe('stub stdout\n');
+      expect({ timeoutExitCode, timeoutExitSignal }).toEqual({
+        timeoutExitCode: null,
+        timeoutExitSignal: null,
+      });
+    } finally {
+      if (
+        timeoutServer &&
+        timeoutExitCode === null &&
+        timeoutExitSignal === null
+      ) {
+        timeoutServer.kill();
+        await new Promise((resolve) => timeoutServer?.once('exit', resolve));
+      }
+      rmSync(timeoutStubDir, { recursive: true, force: true });
+    }
+  });
+
+  test('unblocks queued calls after a review timeout', async () => {
+    const distPath = path.join(process.cwd(), 'dist', 'index.js');
+    await ensureBuild(distPath);
+
+    const timeoutStubDir = createTimeoutCodexStub();
+    let timeoutServer: ReturnType<typeof spawn> | null = null;
+    let timeoutBuffer = '';
+    let timeoutExitCode: number | null = null;
+    let timeoutExitSignal: ExitSignal = null;
+    const timeoutPending = new Map<number, (payload: unknown) => void>();
+
+    const sendTimeoutRequest = (request: Record<string, unknown>) =>
+      new Promise<unknown>((resolve, reject) => {
+        if (!timeoutServer?.stdin) {
+          reject(new Error('Review timeout test server stdin not available'));
+          return;
+        }
+
+        const id = request.id as number;
+        const timer = globalThis.setTimeout(() => {
+          timeoutPending.delete(id);
+          reject(new Error(`Timed out waiting for response ${id}`));
+        }, TEST_TIMEOUT_MS);
+
+        timeoutPending.set(id, (payload) => {
+          globalThis.clearTimeout(timer);
+          resolve(payload);
+        });
+
+        timeoutServer.stdin.write(`${JSON.stringify(request)}\n`);
+      });
+
+    try {
+      timeoutServer = spawn(process.execPath, [distPath], {
+        env: {
+          ...process.env,
+          PATH: `${timeoutStubDir}${path.delimiter}${process.env.PATH}`,
+          CODEX_TOOL_TIMEOUT_MS: '100',
+        },
+        stdio: ['pipe', 'pipe', 'pipe'],
+      });
+
+      timeoutServer.stdout?.setEncoding('utf8');
+      timeoutServer.stdout?.on('data', (chunk: string) => {
+        timeoutBuffer += chunk;
+        let newlineIndex = timeoutBuffer.indexOf('\n');
+
+        while (newlineIndex >= 0) {
+          const line = timeoutBuffer.slice(0, newlineIndex).trim();
+          timeoutBuffer = timeoutBuffer.slice(newlineIndex + 1);
+
+          if (line) {
+            try {
+              const payload = JSON.parse(line) as {
+                id?: number;
+                result?: unknown;
+              };
+
+              if (typeof payload.id === 'number') {
+                const resolver = timeoutPending.get(payload.id);
+                if (resolver) {
+                  resolver(payload.result ?? payload);
+                  timeoutPending.delete(payload.id);
+                }
+              }
+            } catch {
+              // Ignore non-JSON output
+            }
+          }
+
+          newlineIndex = timeoutBuffer.indexOf('\n');
+        }
+      });
+
+      timeoutServer.stderr?.on('data', () => {});
+      timeoutServer.on('exit', (code, signal) => {
+        timeoutExitCode = code;
+        timeoutExitSignal = signal;
+      });
+
+      await sendTimeoutRequest({
+        jsonrpc: JSONRPC_VERSION,
+        id: 300,
+        method: 'initialize',
+        params: {
+          protocolVersion: '2024-11-05',
+          capabilities: {},
+          clientInfo: { name: 'test-review-timeout', version: '1' },
+        },
+      });
+
+      timeoutServer.stdin?.write(
+        `${JSON.stringify({
+          jsonrpc: JSONRPC_VERSION,
+          method: 'notifications/initialized',
+          params: {},
+        })}\n`
+      );
+
+      const timedOutResponse = (await sendTimeoutRequest({
+        jsonrpc: JSONRPC_VERSION,
+        id: 301,
+        method: 'tools/call',
+        params: {
+          name: 'review',
+          arguments: {
+            base: 'main',
+            prompt: 'slow-timeout',
+          },
+        },
+      })) as {
+        content: Array<{ text: string }>;
+        isError?: boolean;
+      };
+
+      expect(timedOutResponse.isError).toBe(true);
+      expect(timedOutResponse.content[0]?.text).toContain(
+        'Tool call timed out after 100ms'
+      );
+
+      const pingStartedAt = Date.now();
+      const pingResponse = (await sendTimeoutRequest({
+        jsonrpc: JSONRPC_VERSION,
+        id: 302,
+        method: 'tools/call',
+        params: {
+          name: 'ping',
+          arguments: { message: 'after-review-timeout' },
+        },
+      })) as {
+        content: Array<{ text: string }>;
+      };
+
+      expect(Date.now() - pingStartedAt).toBeLessThan(1000);
+      expect(pingResponse.content[0]?.text).toBe('after-review-timeout');
+      expect({ timeoutExitCode, timeoutExitSignal }).toEqual({
+        timeoutExitCode: null,
+        timeoutExitSignal: null,
+      });
+    } finally {
+      if (
+        timeoutServer &&
+        timeoutExitCode === null &&
+        timeoutExitSignal === null
+      ) {
+        timeoutServer.kill();
+        await new Promise((resolve) => timeoutServer?.once('exit', resolve));
+      }
+      rmSync(timeoutStubDir, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/src/__tests__/model-selection.test.ts b/src/__tests__/model-selection.test.ts
index a261883..2211627 100644
--- a/src/__tests__/model-selection.test.ts
+++ b/src/__tests__/model-selection.test.ts
@@ -1,18 +1,17 @@
-import { CodexToolHandler } from '../tools/handlers.js';
+import type { CodexToolHandler as CodexToolHandlerType } from '../tools/handlers.js';
 import { InMemorySessionStorage } from '../session/storage.js';
-import { executeCommand } from '../utils/command.js';
 
 // Mock the command execution
 jest.mock('../utils/command.js', () => ({
   executeCommand: jest.fn(),
 }));
 
-const mockedExecuteCommand = executeCommand as jest.MockedFunction<
-  typeof executeCommand
->;
-
 describe('Model Selection and Reasoning Effort', () => {
-  let handler: CodexToolHandler;
+  let CodexToolHandler: typeof import('../tools/handlers.js').CodexToolHandler;
+  let handler: CodexToolHandlerType;
+  let mockedExecuteCommand: jest.MockedFunction<
+    typeof import('../utils/command.js').executeCommand
+  >;
   let sessionStorage: InMemorySessionStorage;
   let originalStructuredContent: string | undefined;
 
@@ -28,7 +27,14 @@ describe('Model Selection and Reasoning Effort', () => {
     }
   });
 
-  beforeEach(() => {
+  beforeEach(async () => {
+    jest.resetModules();
+    process.env.STRUCTURED_CONTENT_ENABLED = '1';
+    ({ CodexToolHandler } = await import('../tools/handlers.js'));
+    const commandModule = await import('../utils/command.js');
+    mockedExecuteCommand = commandModule.executeCommand as jest.MockedFunction<
+      typeof commandModule.executeCommand
+    >;
     sessionStorage = new InMemorySessionStorage();
     handler = new CodexToolHandler(sessionStorage);
     mockedExecuteCommand.mockClear();
@@ -36,7 +42,6 @@ describe('Model Selection and Reasoning Effort', () => {
       stdout: 'Test response',
       stderr: '',
     });
-    process.env.STRUCTURED_CONTENT_ENABLED = '1';
   });
 
   test('should pass model parameter to codex CLI', async () => {
@@ -63,7 +68,7 @@ describe('Model Selection and Reasoning Effort', () => {
       [
         'exec',
         '--model',
-        'gpt-5.3-codex',
+        'gpt-5.4',
         '-c',
         'model_reasoning_effort="high"',
         '--skip-git-repo-check',
@@ -141,7 +146,7 @@ describe('Model Selection and Reasoning Effort', () => {
       [
         'exec',
         '--model',
-        'gpt-5.3-codex',
+        'gpt-5.4',
         '-c',
         'model_reasoning_effort="minimal"',
         '--skip-git-repo-check',
@@ -162,7 +167,7 @@ describe('Model Selection and Reasoning Effort', () => {
       [
         'exec',
         '--model',
-        'gpt-5.3-codex',
+        'gpt-5.4',
         '-c',
         'model_reasoning_effort="none"',
         '--skip-git-repo-check',
@@ -183,7 +188,7 @@ describe('Model Selection and Reasoning Effort', () => {
       [
         'exec',
         '--model',
-        'gpt-5.3-codex',
+        'gpt-5.4',
         '-c',
         'model_reasoning_effort="xhigh"',
         '--skip-git-repo-check',
diff --git a/src/__tests__/resume-functionality.test.ts b/src/__tests__/resume-functionality.test.ts
index e76dd63..b991b59 100644
--- a/src/__tests__/resume-functionality.test.ts
+++ b/src/__tests__/resume-functionality.test.ts
@@ -1,18 +1,17 @@
-import { CodexToolHandler } from '../tools/handlers.js';
+import type { CodexToolHandler as CodexToolHandlerType } from '../tools/handlers.js';
 import { InMemorySessionStorage } from '../session/storage.js';
-import { executeCommand } from '../utils/command.js';
 
 // Mock the command execution
 jest.mock('../utils/command.js', () => ({
   executeCommand: jest.fn(),
 }));
 
-const mockedExecuteCommand = executeCommand as jest.MockedFunction<
-  typeof executeCommand
->;
-
 describe('Codex Resume Functionality', () => {
-  let handler: CodexToolHandler;
+  let CodexToolHandler: typeof import('../tools/handlers.js').CodexToolHandler;
+  let handler: CodexToolHandlerType;
+  let mockedExecuteCommand: jest.MockedFunction<
+    typeof import('../utils/command.js').executeCommand
+  >;
   let sessionStorage: InMemorySessionStorage;
   let originalStructuredContent: string | undefined;
 
@@ -28,7 +27,14 @@ describe('Codex Resume Functionality', () => {
     }
   });
 
-  beforeEach(() => {
+  beforeEach(async () => {
+    jest.resetModules();
+    process.env.STRUCTURED_CONTENT_ENABLED = '1';
+    ({ CodexToolHandler } = await import('../tools/handlers.js'));
+    const commandModule = await import('../utils/command.js');
+    mockedExecuteCommand = commandModule.executeCommand as jest.MockedFunction<
+      typeof commandModule.executeCommand
+    >;
     sessionStorage = new InMemorySessionStorage();
     handler = new CodexToolHandler(sessionStorage);
     mockedExecuteCommand.mockClear();
@@ -50,13 +56,7 @@ describe('Codex Resume Functionality', () => {
 
     expect(mockedExecuteCommand).toHaveBeenCalledWith(
       'codex',
-      [
-        'exec',
-        '--model',
-        'gpt-5.3-codex',
-        '--skip-git-repo-check',
-        'First message',
-      ],
+      ['exec', '--model', 'gpt-5.4', '--skip-git-repo-check', 'First message'],
       expect.any(Object)
     );
   });
@@ -166,7 +166,7 @@ describe('Codex Resume Functionality', () => {
         'exec',
         '--skip-git-repo-check',
         '-c',
-        'model="gpt-5.3-codex"',
+        'model="gpt-5.4"',
         'resume',
         'existing-codex-session-id',
         'Continue the task',
@@ -196,7 +196,7 @@ describe('Codex Resume Functionality', () => {
       [
         'exec',
         '--model',
-        'gpt-5.3-codex',
+        'gpt-5.4',
         '--skip-git-repo-check',
         'Reset and start new',
       ],
@@ -229,7 +229,7 @@ describe('Codex Resume Functionality', () => {
 
     // Should build enhanced prompt since no codex session ID
     const call = mockedExecuteCommand.mock.calls[0];
-    const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.3-codex, --skip-git-repo-check, prompt
+    const sentPrompt = call?.[1]?.[4]; // After exec, --model, gpt-5.4, --skip-git-repo-check, prompt
     expect(sentPrompt).toContain('Context:');
     expect(sentPrompt).toContain('Task: Follow up question');
   });
diff --git a/src/__tests__/runtime-config.test.ts b/src/__tests__/runtime-config.test.ts
new file mode 100644
index 0000000..c80bb0a
--- /dev/null
+++ b/src/__tests__/runtime-config.test.ts
@@ -0,0 +1,81 @@
+import path from 'path';
+import { readFileSync } from 'fs';
+
+import { CodexToolSchema } from '../types.js';
+import {
+  COMMAND_LOG_ENV_VAR,
+  SERVER_CONFIG,
+  STARTUP_LOG_ENV_VAR,
+  getServerVersion,
+  isCommandLoggingEnabled,
+  isStartupLoggingEnabled,
+} from '../runtime-config.js';
+
+describe('runtime config', () => {
+  test('uses package version for server config', () => {
+    const packageJsonPath = path.join(process.cwd(), 'package.json');
+    const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf8')) as {
+      version: string;
+    };
+
+    expect(SERVER_CONFIG.name).toBe('codex-mcp-server');
+    // Test getServerVersion with an explicit path to avoid cwd/argv sensitivity
+    expect(getServerVersion(path.join(process.cwd(), 'dist/index.js'))).toBe(
+      packageJson.version
+    );
+  });
+
+  test('startup logging is disabled by default', () => {
+    expect(isStartupLoggingEnabled({})).toBe(false);
+  });
+
+  test('startup logging can be enabled via env var', () => {
+    expect(isStartupLoggingEnabled({ [STARTUP_LOG_ENV_VAR]: '1' })).toBe(true);
+    expect(isStartupLoggingEnabled({ [STARTUP_LOG_ENV_VAR]: 'true' })).toBe(
+      true
+    );
+  });
+
+  test('command logging is disabled by default', () => {
+    expect(isCommandLoggingEnabled({})).toBe(false);
+  });
+
+  test('command logging can be enabled via env var', () => {
+    expect(isCommandLoggingEnabled({ [COMMAND_LOG_ENV_VAR]: '1' })).toBe(true);
+    expect(isCommandLoggingEnabled({ [COMMAND_LOG_ENV_VAR]: 'yes' })).toBe(
+      true
+    );
+  });
+
+  test('codex schema accepts a positive timeout override', () => {
+    const parsedArgs = CodexToolSchema.parse({
+      prompt: 'test prompt',
+      timeoutMs: 250,
+    });
+
+    expect(parsedArgs).toEqual(expect.objectContaining({ timeoutMs: 250 }));
+  });
+
+  test('codex schema rejects invalid timeout overrides', () => {
+    expect(() =>
+      CodexToolSchema.parse({
+        prompt: 'test prompt',
+        timeoutMs: 0,
+      })
+    ).toThrow();
+
+    expect(() =>
+      CodexToolSchema.parse({
+        prompt: 'test prompt',
+        timeoutMs: -1,
+      })
+    ).toThrow();
+
+    expect(() =>
+      CodexToolSchema.parse({
+        prompt: 'test prompt',
+        timeoutMs: 12.5,
+      })
+    ).toThrow();
+  });
+});
diff --git a/src/__tests__/startup-logging.test.ts b/src/__tests__/startup-logging.test.ts
new file mode 100644
index 0000000..c4a1fda
--- /dev/null
+++ b/src/__tests__/startup-logging.test.ts
@@ -0,0 +1,58 @@
+// Mock chalk to avoid ESM issues in Jest
+jest.mock('chalk', () => ({
+  __esModule: true,
+  default: {
+    blue: (text: string) => text,
+    yellow: (text: string) => text,
+    green: (text: string) => text,
+    red: (text: string) => text,
+  },
+}));
+
+import { Server } from '@modelcontextprotocol/sdk/server/index.js';
+
+import { CodexMcpServer } from '../server.js';
+import { STARTUP_LOG_ENV_VAR } from '../runtime-config.js';
+
+describe('startup logging', () => {
+  beforeEach(() => {
+    Reflect.deleteProperty(process.env, STARTUP_LOG_ENV_VAR);
+    jest.restoreAllMocks();
+  });
+
+  test('does not log startup to stderr by default', async () => {
+    const connectSpy = jest
+      .spyOn(Server.prototype, 'connect')
+      .mockResolvedValue(undefined as never);
+    const errorSpy = jest.spyOn(console, 'error').mockImplementation(() => {});
+
+    const server = new CodexMcpServer({
+      name: 'test-server',
+      version: '1.0.0',
+    });
+
+    await server.start();
+
+    expect(connectSpy).toHaveBeenCalled();
+    expect(errorSpy).not.toHaveBeenCalled();
+  });
+
+  test('logs startup when explicitly enabled', async () => {
+    process.env[STARTUP_LOG_ENV_VAR] = '1';
+
+    const connectSpy = jest
+      .spyOn(Server.prototype, 'connect')
+      .mockResolvedValue(undefined as never);
+    const errorSpy = jest.spyOn(console, 'error').mockImplementation(() => {});
+
+    const server = new CodexMcpServer({
+      name: 'test-server',
+      version: '1.0.0',
+    });
+
+    await server.start();
+
+    expect(connectSpy).toHaveBeenCalled();
+    expect(errorSpy).toHaveBeenCalledWith('test-server started successfully');
+  });
+});
diff --git a/src/__tests__/working-directory.test.ts b/src/__tests__/working-directory.test.ts
index b3bb799..896f14e 100644
--- a/src/__tests__/working-directory.test.ts
+++ b/src/__tests__/working-directory.test.ts
@@ -1,4 +1,9 @@
-import { CodexToolHandler, ReviewToolHandler } from '../tools/handlers.js';
+import {
+  CodexToolHandler,
+  HelpToolHandler,
+  ReviewToolHandler,
+  WebSearchToolHandler,
+} from '../tools/handlers.js';
 import { InMemorySessionStorage } from '../session/storage.js';
 import { executeCommand, executeCommandStreaming } from '../utils/command.js';
 
@@ -18,13 +23,17 @@ const mockedExecuteCommandStreaming =
 
 describe('Working Directory (cwd) Support', () => {
   let codexHandler: CodexToolHandler;
+  let helpHandler: HelpToolHandler;
   let reviewHandler: ReviewToolHandler;
   let sessionStorage: InMemorySessionStorage;
+  let webSearchHandler: WebSearchToolHandler;
 
   beforeEach(() => {
     sessionStorage = new InMemorySessionStorage();
     codexHandler = new CodexToolHandler(sessionStorage);
+    helpHandler = new HelpToolHandler();
     reviewHandler = new ReviewToolHandler();
+    webSearchHandler = new WebSearchToolHandler();
     mockedExecuteCommand.mockClear();
     mockedExecuteCommandStreaming.mockClear();
     mockedExecuteCommand.mockResolvedValue({
@@ -70,12 +79,15 @@ describe('Working Directory (cwd) Support', () => {
     });
 
     test('should pass cwd to streaming execution', async () => {
+      const controller = new AbortController();
+
       await reviewHandler.execute(
         {
           uncommitted: true,
           workingDirectory: '/path/to/worktree',
         },
         {
+          abortSignal: controller.signal,
           sendProgress: async () => {},
           progressToken: 'test-token',
         }
@@ -84,7 +96,10 @@ describe('Working Directory (cwd) Support', () => {
       expect(mockedExecuteCommandStreaming).toHaveBeenCalledWith(
         'codex',
         expect.arrayContaining(['-C', '/path/to/worktree']),
-        expect.objectContaining({ cwd: '/path/to/worktree' })
+        expect.objectContaining({
+          cwd: '/path/to/worktree',
+          signal: controller.signal,
+        })
       );
     });
   });
@@ -115,6 +130,19 @@ describe('Working Directory (cwd) Support', () => {
       );
     });
 
+    test('should pass bypassApprovals flag when starting a new execution', async () => {
+      await codexHandler.execute({
+        prompt: 'Test prompt',
+        bypassApprovals: true,
+      });
+
+      expect(mockedExecuteCommand).toHaveBeenCalledWith(
+        'codex',
+        expect.arrayContaining(['--dangerously-bypass-approvals-and-sandbox']),
+        expect.any(Object)
+      );
+    });
+
     test('should not apply cwd when resuming a session', async () => {
       const sessionId = sessionStorage.createSession();
       sessionStorage.setCodexConversationId(sessionId, 'conv-123');
@@ -135,5 +163,89 @@ describe('Working Directory (cwd) Support', () => {
       const args = mockedExecuteCommand.mock.calls[0][1];
       expect(args).not.toContain('-C');
     });
+
+    test('should pass bypassApprovals flag when resuming a session', async () => {
+      const sessionId = sessionStorage.createSession();
+      sessionStorage.setCodexConversationId(sessionId, 'conv-123');
+
+      await codexHandler.execute({
+        prompt: 'Continue task',
+        sessionId,
+        bypassApprovals: true,
+      });
+
+      expect(mockedExecuteCommand).toHaveBeenCalledWith(
+        'codex',
+        expect.arrayContaining([
+          '--dangerously-bypass-approvals-and-sandbox',
+          'resume',
+          'conv-123',
+          'Continue task',
+        ]),
+        expect.any(Object)
+      );
+    });
+
+    test('should pass fullAuto flag when resuming a session', async () => {
+      const sessionId = sessionStorage.createSession();
+      sessionStorage.setCodexConversationId(sessionId, 'conv-123');
+
+      await codexHandler.execute({
+        prompt: 'Continue task',
+        sessionId,
+        fullAuto: true,
+      });
+
+      expect(mockedExecuteCommand).toHaveBeenCalledWith(
+        'codex',
+        expect.arrayContaining([
+          '--full-auto',
+          'resume',
+          'conv-123',
+          'Continue task',
+        ]),
+        expect.any(Object)
+      );
+    });
+  });
+
+  describe('HelpToolHandler', () => {
+    test('should pass abortSignal to command execution', async () => {
+      const controller = new AbortController();
+
+      await helpHandler.execute(
+        {},
+        {
+          abortSignal: controller.signal,
+          sendProgress: async () => {},
+        }
+      );
+
+      expect(mockedExecuteCommand).toHaveBeenCalledWith(
+        'codex',
+        ['--help'],
+        expect.objectContaining({ signal: controller.signal })
+      );
+    });
+  });
+
+  describe('WebSearchToolHandler', () => {
+    test('should pass abortSignal to command execution', async () => {
+      const controller = new AbortController();
+
+      await webSearchHandler.execute(
+        { query: 'timeout regression' },
+        {
+          abortSignal: controller.signal,
+          sendProgress: async () => {},
+        }
+      );
+
+      expect(mockedExecuteCommand).toHaveBeenCalledWith(
+        'codex',
+        expect.arrayContaining(['--search', 'exec']),
+        expect.objectContaining({ signal: controller.signal })
+      );
+    });
   });
 });
diff --git a/src/index.ts b/src/index.ts
index 595de7e..c3dab8e 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -2,11 +2,7 @@
 
 import chalk from 'chalk';
 import { CodexMcpServer } from './server.js';
-
-const SERVER_CONFIG = {
-  name: 'codex-mcp-server',
-  version: '0.0.6',
-} as const;
+import { SERVER_CONFIG } from './runtime-config.js';
 
 async function main(): Promise<void> {
   try {
diff --git a/src/runtime-config.ts b/src/runtime-config.ts
new file mode 100644
index 0000000..b7e276b
--- /dev/null
+++ b/src/runtime-config.ts
@@ -0,0 +1,59 @@
+import { existsSync, readFileSync } from 'fs';
+import path from 'path';
+
+export const STARTUP_LOG_ENV_VAR = 'CODEX_MCP_DEBUG_STARTUP' as const;
+export const COMMAND_LOG_ENV_VAR = 'CODEX_MCP_DEBUG_COMMANDS' as const;
+
+function getPackageJsonPath(entryScriptPath = process.argv[1]): string {
+  if (entryScriptPath) {
+    const candidatePaths = [
+      path.resolve(path.dirname(entryScriptPath), '../package.json'),
+      path.resolve(path.dirname(entryScriptPath), 'package.json'),
+    ];
+
+    for (const candidatePath of candidatePaths) {
+      if (existsSync(candidatePath)) {
+        return candidatePath;
+      }
+    }
+  }
+
+  return path.resolve(process.cwd(), 'package.json');
+}
+
+export function getServerVersion(entryScriptPath = process.argv[1]): string {
+  try {
+    const packageJson = JSON.parse(
+      readFileSync(getPackageJsonPath(entryScriptPath), 'utf8')
+    ) as { version?: string };
+
+    return packageJson.version ?? '0.0.0';
+  } catch {
+    return '0.0.0';
+  }
+}
+
+export const SERVER_CONFIG = {
+  name: 'codex-mcp-server',
+  version: getServerVersion(),
+} as const;
+
+function isTruthyDebugFlag(rawValue: string | undefined): boolean {
+  if (!rawValue) {
+    return false;
+  }
+
+  return ['1', 'true', 'yes', 'on'].includes(rawValue.toLowerCase());
+}
+
+export function isStartupLoggingEnabled(
+  env: Record<string, string | undefined> = process.env
+): boolean {
+  return isTruthyDebugFlag(env[STARTUP_LOG_ENV_VAR]);
+}
+
+export function isCommandLoggingEnabled(
+  env: Record<string, string | undefined> = process.env
+): boolean {
+  return isTruthyDebugFlag(env[COMMAND_LOG_ENV_VAR]);
+}
diff --git a/src/server.ts b/src/server.ts
index 4c9a69e..3143970 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -9,6 +9,7 @@ import chalk from 'chalk';
 import {
   type ServerConfig,
   type ToolName,
+  type ToolResult,
   type ToolHandlerContext,
   type ProgressToken,
   TOOLS,
@@ -16,13 +17,17 @@ import {
 import { handleError } from './errors.js';
 import { toolDefinitions } from './tools/definitions.js';
 import { toolHandlers } from './tools/handlers.js';
+import { isStartupLoggingEnabled } from './runtime-config.js';
 
 export class CodexMcpServer {
   private readonly server: Server;
   private readonly config: ServerConfig;
+  private callQueue: Promise<void> = Promise.resolve();
+  private readonly toolTimeoutMs: number;
 
   constructor(config: ServerConfig) {
     this.config = config;
+    this.toolTimeoutMs = this.getToolTimeoutMs();
     this.server = new Server(
       {
         name: config.name,
@@ -50,10 +55,11 @@ export class CodexMcpServer {
       const progressToken = request.params._meta?.progressToken as ProgressToken | undefined;
 
       // Create progress sender that uses MCP notifications
-      const createProgressContext = (): ToolHandlerContext => {
+      const createProgressContext = (abortSignal?: AbortSignal): ToolHandlerContext => {
         let progressCount = 0;
         return {
           progressToken,
+          abortSignal,
           sendProgress: async (message: string, progress?: number, total?: number) => {
             if (!progressToken) return;
 
@@ -77,13 +83,39 @@ export class CodexMcpServer {
       };
 
       try {
-        if (!this.isValidToolName(name)) {
-          throw new Error(`Unknown tool: ${name}`);
-        }
+        return await new Promise<ToolResult>((resolve, reject) => {
+          this.callQueue = this.callQueue.then(async () => {
+            let operation: Promise<ToolResult> | undefined;
 
-        const handler = toolHandlers[name];
-        const context = createProgressContext();
-        return await handler.execute(args, context);
+            try {
+              if (!this.isValidToolName(name)) {
+                throw new Error(`Unknown tool: ${name}`);
+              }
+
+              const handler = toolHandlers[name];
+              const controller = new AbortController();
+              const context = createProgressContext(controller.signal);
+              const timeoutMs = this.getRequestTimeoutMs(name, args);
+              operation = handler.execute(args, context);
+
+              resolve(
+                await this.withTimeout(
+                  operation,
+                  timeoutMs,
+                  controller
+                )
+              );
+            } catch (err) {
+              reject(err);
+            } finally {
+              // Keep the queue blocked until the child process actually exits.
+              await operation?.then(
+                () => undefined,
+                () => undefined
+              );
+            }
+          });
+        });
       } catch (error) {
         return {
           content: [
@@ -102,9 +134,65 @@ export class CodexMcpServer {
     return Object.values(TOOLS).includes(name as ToolName);
   }
 
+  private getToolTimeoutMs(): number {
+    const rawTimeout = process.env.CODEX_TOOL_TIMEOUT_MS;
+    if (!rawTimeout) {
+      return 120_000;
+    }
+
+    const parsedTimeout = Number.parseInt(rawTimeout, 10);
+    if (Number.isFinite(parsedTimeout) && parsedTimeout > 0) {
+      return parsedTimeout;
+    }
+
+    return 120_000;
+  }
+
+  private getRequestTimeoutMs(name: ToolName, args: unknown): number {
+    if (name !== TOOLS.CODEX || !args || typeof args !== 'object') {
+      return this.toolTimeoutMs;
+    }
+
+    const timeoutMs = (args as { timeoutMs?: unknown }).timeoutMs;
+    if (
+      typeof timeoutMs === 'number' &&
+      Number.isInteger(timeoutMs) &&
+      timeoutMs > 0
+    ) {
+      return timeoutMs;
+    }
+
+    return this.toolTimeoutMs;
+  }
+
+  private async withTimeout<T>(
+    operation: Promise<T>,
+    timeoutMs: number,
+    controller?: AbortController
+  ): Promise<T> {
+    let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
+
+    try {
+      const timeoutPromise = new Promise<never>((_resolve, reject) => {
+        timeoutHandle = setTimeout(() => {
+          controller?.abort();
+          reject(new Error(`Tool call timed out after ${timeoutMs}ms`));
+        }, timeoutMs);
+      });
+
+      return await Promise.race([operation, timeoutPromise]);
+    } finally {
+      if (timeoutHandle) {
+        clearTimeout(timeoutHandle);
+      }
+    }
+  }
+
   async start(): Promise<void> {
     const transport = new StdioServerTransport();
     await this.server.connect(transport);
-    console.error(chalk.green(`${this.config.name} started successfully`));
+    if (isStartupLoggingEnabled()) {
+      console.error(chalk.green(`${this.config.name} started successfully`));
+    }
   }
 }
diff --git a/src/tools/definitions.ts b/src/tools/definitions.ts
index dd52a15..89da858 100644
--- a/src/tools/definitions.ts
+++ b/src/tools/definitions.ts
@@ -14,7 +14,7 @@ export const toolDefinitions: ToolDefinition[] = [
         sessionId: {
           type: 'string',
           description:
-            'Optional session ID for conversational context. Note: when resuming a session, sandbox/fullAuto/workingDirectory parameters are not applied (CLI limitation)',
+            'Optional session ID for conversational context. Note: when resuming a session, sandbox and workingDirectory are not applied (CLI limitation)',
         },
         resetSession: {
           type: 'boolean',
@@ -42,6 +42,11 @@ export const toolDefinitions: ToolDefinition[] = [
           description:
             'Enable full-auto mode: sandboxed automatic execution without approval prompts (equivalent to -a on-request --sandbox workspace-write)',
         },
+        bypassApprovals: {
+          type: 'boolean',
+          description:
+            'Skip all approval prompts and disable sandboxing. Extremely dangerous; use only in externally sandboxed environments',
+        },
         workingDirectory: {
           type: 'string',
           description:
@@ -52,6 +57,12 @@ export const toolDefinitions: ToolDefinition[] = [
           description:
             'Static MCP callback URI to pass to Codex via environment (if provided)',
         },
+        timeoutMs: {
+          type: 'integer',
+          description:
+            'Optional per-call timeout in milliseconds. Overrides CODEX_TOOL_TIMEOUT_MS for this request only',
+          minimum: 1,
+        },
       },
       required: ['prompt'],
     },
diff --git a/src/tools/handlers.ts b/src/tools/handlers.ts
index 9d7eabd..adfe200 100644
--- a/src/tools/handlers.ts
+++ b/src/tools/handlers.ts
@@ -2,12 +2,14 @@ import {
   TOOLS,
   DEFAULT_CODEX_MODEL,
   CODEX_DEFAULT_MODEL_ENV_VAR,
+  type ToolName,
   type ToolResult,
   type ToolHandlerContext,
   type CodexToolArgs,
   type ReviewToolArgs,
   type PingToolArgs,
   type WebSearchToolArgs,
+  type CommandResult,
   CodexToolSchema,
   ReviewToolSchema,
   PingToolSchema,
@@ -21,29 +23,96 @@ import {
   type ConversationTurn,
 } from '../session/storage.js';
 import { ToolExecutionError, ValidationError } from '../errors.js';
-import { executeCommand, executeCommandStreaming } from '../utils/command.js';
+import {
+  executeCommand,
+  executeCommandStreaming,
+  type CommandOptions,
+} from '../utils/command.js';
 import { ZodError } from 'zod';
 import path from 'node:path';
 
-// Default no-op context for handlers that don't need progress
+interface ToolHandler {
+  execute(args: unknown, context?: ToolHandlerContext): Promise<ToolResult>;
+}
+
 const defaultContext: ToolHandlerContext = {
   sendProgress: async () => {},
 };
 
-const isStructuredContentEnabled = (): boolean => {
-  const raw = process.env.STRUCTURED_CONTENT_ENABLED;
-  if (!raw) return false;
-  return ['1', 'true', 'yes', 'on'].includes(raw.toLowerCase());
-};
+const STRUCTURED_CONTENT_ENABLED = ['1', 'true', 'yes', 'on'].includes(
+  (process.env.STRUCTURED_CONTENT_ENABLED ?? '').toLowerCase()
+);
 
-export class CodexToolHandler {
-  constructor(private sessionStorage: SessionStorage) {}
+function getToolContext(context?: ToolHandlerContext): ToolHandlerContext {
+  return context ?? defaultContext;
+}
+
+function getSelectedModel(model?: string): string {
+  return (
+    model || process.env[CODEX_DEFAULT_MODEL_ENV_VAR] || DEFAULT_CODEX_MODEL
+  );
+}
+
+function getCommandResponse(result: CommandResult, fallback: string): string {
+  return result.stdout || result.stderr || fallback;
+}
+
+class CommandBackedToolHandler {
+  protected readonly structuredContentEnabled = STRUCTURED_CONTENT_ENABLED;
+
+  protected async executeCodexCommand(
+    cmdArgs: string[],
+    context: ToolHandlerContext,
+    options?: CommandOptions
+  ): Promise<CommandResult> {
+    const commandOptions = {
+      ...options,
+      signal: options?.signal ?? context.abortSignal,
+    };
+
+    if (context.progressToken) {
+      return executeCommandStreaming('codex', cmdArgs, {
+        ...commandOptions,
+        onProgress: (message) => {
+          void context.sendProgress(message);
+        },
+      });
+    }
+
+    return executeCommand('codex', cmdArgs, commandOptions);
+  }
+
+  protected createTextResult(
+    text: string,
+    metadata?: Record<string, unknown>
+  ): ToolResult {
+    const hasMetadata = !!metadata && Object.keys(metadata).length > 0;
+
+    return {
+      content: [
+        {
+          type: 'text',
+          text,
+          ...(hasMetadata ? { _meta: metadata } : {}),
+        },
+      ],
+      structuredContent:
+        this.structuredContentEnabled && hasMetadata ? metadata : undefined,
+    };
+  }
+}
+
+export class CodexToolHandler extends CommandBackedToolHandler {
+  constructor(private sessionStorage: SessionStorage) {
+    super();
+  }
 
   async execute(
     args: unknown,
-    context: ToolHandlerContext = defaultContext
+    context?: ToolHandlerContext
   ): Promise<ToolResult> {
     try {
+      const toolContext = getToolContext(context);
       const {
         prompt,
         sessionId,
@@ -52,6 +121,7 @@ export class CodexToolHandler {
         reasoningEffort,
         sandbox,
         fullAuto,
+        bypassApprovals,
         workingDirectory,
         callbackUri,
       }: CodexToolArgs = CodexToolSchema.parse(args);
@@ -61,7 +131,7 @@ export class CodexToolHandler {
         ? path.resolve(workingDirectory)
         : undefined;
 
-      let activeSessionId = sessionId;
+      const activeSessionId = sessionId;
       let enhancedPrompt = prompt;
 
       // Only work with sessions if explicitly requested
@@ -92,10 +162,7 @@ export class CodexToolHandler {
       }
 
       // Build command arguments with v0.75.0+ features
-      const selectedModel =
-        model ||
-        process.env[CODEX_DEFAULT_MODEL_ENV_VAR] ||
-        DEFAULT_CODEX_MODEL;
+      const selectedModel = getSelectedModel(model);
 
       const effectiveCallbackUri =
         callbackUri || process.env.CODEX_MCP_CALLBACK_URI;
@@ -115,6 +182,15 @@ export class CodexToolHandler {
           cmdArgs.push('-c', `model_reasoning_effort="${reasoningEffort}"`);
         }
 
+        if (fullAuto) {
+          cmdArgs.push('--full-auto');
+        }
+
+        // Bypass all approval prompts and sandboxing — use only in externally sandboxed envs
+        if (bypassApprovals) {
+          cmdArgs.push('--dangerously-bypass-approvals-and-sandbox');
+        }
+
         // Add resume subcommand with conversation ID and prompt
         cmdArgs.push('resume', codexConversationId, enhancedPrompt);
       } else {
@@ -139,6 +215,11 @@ export class CodexToolHandler {
           cmdArgs.push('--full-auto');
         }
 
+        // Bypass all approval prompts and sandboxing — use only in externally sandboxed envs
+        if (bypassApprovals) {
+          cmdArgs.push('--dangerously-bypass-approvals-and-sandbox');
+        }
+
         // Add working directory (v0.75.0+)
         if (resolvedWorkDir) {
           cmdArgs.push('-C', resolvedWorkDir);
@@ -151,35 +232,29 @@ export class CodexToolHandler {
       }
 
       // Send initial progress notification
-      await context.sendProgress('Starting Codex execution...', 0);
-
-      // Use streaming execution if progress is enabled
-      const useStreaming = !!context.progressToken;
+      await toolContext.sendProgress('Starting Codex execution...', 0);
       const envOverride = effectiveCallbackUri
         ? { CODEX_MCP_CALLBACK_URI: effectiveCallbackUri }
         : undefined;
 
       // Pass cwd to spawn so the child process starts in the correct directory.
       // This works around openai/codex#9084 where -C is ignored by some subcommands.
-      // Skip cwd during resume: sandbox, fullAuto, and workingDirectory are not
-      // applied in resume mode (Codex CLI limitation).
+      // Skip cwd during resume: sandbox and workingDirectory are not applied in
+      // resume mode (Codex CLI limitation).
       const cmdOptions = {
         cwd: useResume ? undefined : resolvedWorkDir,
         envOverride,
+        signal: toolContext.abortSignal,
       };
 
-      const result = useStreaming
-        ? await executeCommandStreaming('codex', cmdArgs, {
-            ...cmdOptions,
-            onProgress: (message) => {
-              // Send progress notification for each chunk of output
-              context.sendProgress(message);
-            },
-          })
-        : await executeCommand('codex', cmdArgs, cmdOptions);
+      const result = await this.executeCodexCommand(
+        cmdArgs,
+        toolContext,
+        cmdOptions
+      );
 
       // Codex CLI may output to stderr, so check both
-      const response = result.stdout || result.stderr || 'No output from Codex';
+      const response = getCommandResponse(result, 'No output from Codex');
 
       // Extract conversation/session ID from new conversations for future resume
       // Codex CLI outputs have varied between "session id" and "conversation id"
@@ -216,25 +291,13 @@ ${result.stdout || ''}`.trim();
       // - content[0]._meta: For Claude Code compatibility (avoids structuredContent bug)
       // - structuredContent: For other MCP clients that properly support it
       const metadata: Record<string, unknown> = {
-        ...(threadId && { threadId }),
-        ...(selectedModel && { model: selectedModel }),
+        model: selectedModel,
         ...(activeSessionId && { sessionId: activeSessionId }),
         ...(effectiveCallbackUri && { callbackUri: effectiveCallbackUri }),
+        ...(threadId && { threadId }),
       };
 
-      return {
-        content: [
-          {
-            type: 'text',
-            text: response,
-            _meta: metadata,
-          },
-        ],
-        structuredContent:
-          isStructuredContentEnabled() && Object.keys(metadata).length > 0
-            ? metadata
-            : undefined,
-      };
+      return this.createTextResult(response, metadata);
     } catch (error) {
       if (error instanceof ValidationError) {
         throw error;
@@ -277,10 +340,7 @@ ${result.stdout || ''}`.trim();
 }
 
 export class PingToolHandler {
-  async execute(
-    args: unknown,
-    _context: ToolHandlerContext = defaultContext
-  ): Promise<ToolResult> {
+  async execute(args: unknown): Promise<ToolResult> {
     try {
       const { message = 'pong' }: PingToolArgs = PingToolSchema.parse(args);
 
@@ -305,24 +365,20 @@ export class PingToolHandler {
   }
 }
 
-export class HelpToolHandler {
+export class HelpToolHandler extends CommandBackedToolHandler {
   async execute(
     args: unknown,
-    _context: ToolHandlerContext = defaultContext
+    context?: ToolHandlerContext
   ): Promise<ToolResult> {
     try {
+      const toolContext = getToolContext(context);
       HelpToolSchema.parse(args);
 
-      const result = await executeCommand('codex', ['--help']);
+      const result = await this.executeCodexCommand(['--help'], toolContext);
 
-      return {
-        content: [
-          {
-            type: 'text',
-            text: result.stdout || 'No help information available',
-          },
-        ],
-      };
+      return this.createTextResult(
+        getCommandResponse(result, 'No help information available')
+      );
     } catch (error) {
       if (error instanceof ZodError) {
         throw new ValidationError(TOOLS.HELP, error.message);
@@ -339,10 +395,7 @@ export class HelpToolHandler {
 export class ListSessionsToolHandler {
   constructor(private sessionStorage: SessionStorage) {}
 
-  async execute(
-    args: unknown,
-    _context: ToolHandlerContext = defaultContext
-  ): Promise<ToolResult> {
+  async execute(args: unknown): Promise<ToolResult> {
     try {
       ListSessionsToolSchema.parse(args);
 
@@ -378,12 +431,13 @@ export class ListSessionsToolHandler {
   }
 }
 
-export class ReviewToolHandler {
+export class ReviewToolHandler extends CommandBackedToolHandler {
   async execute(
     args: unknown,
-    context: ToolHandlerContext = defaultContext
+    context?: ToolHandlerContext
   ): Promise<ToolResult> {
     try {
+      const toolContext = getToolContext(context);
       const {
         prompt,
         uncommitted,
@@ -414,10 +468,7 @@ export class ReviewToolHandler {
       }
 
       // Add model parameter via config
-      const selectedModel =
-        model ||
-        process.env[CODEX_DEFAULT_MODEL_ENV_VAR] ||
-        DEFAULT_CODEX_MODEL;
+      const selectedModel = getSelectedModel(model);
       cmdArgs.push('-c', `model="${selectedModel}"`);
 
       cmdArgs.push('review');
@@ -445,24 +496,21 @@ export class ReviewToolHandler {
       }
 
       // Send initial progress notification
-      await context.sendProgress('Starting code review...', 0);
-
-      const useStreaming = !!context.progressToken;
+      await toolContext.sendProgress('Starting code review...', 0);
       // Pass cwd to spawn so the child process starts in the correct directory.
       // This works around openai/codex#9084 where -C is ignored by `review`.
       const cmdOptions = { cwd: resolvedWorkDir };
-      const result = useStreaming
-        ? await executeCommandStreaming('codex', cmdArgs, {
-            ...cmdOptions,
-            onProgress: (message) => {
-              context.sendProgress(message);
-            },
-          })
-        : await executeCommand('codex', cmdArgs, cmdOptions);
+      const result = await this.executeCodexCommand(
+        cmdArgs,
+        toolContext,
+        cmdOptions
+      );
 
       // Codex CLI outputs to stderr, so check both stdout and stderr
-      const response =
-        result.stdout || result.stderr || 'No review output from Codex';
+      const response = getCommandResponse(
+        result,
+        'No review output from Codex'
+      );
 
       // Prepare metadata for dual approach:
       // - content[0]._meta: For Claude Code compatibility (avoids structuredContent bug)
@@ -473,16 +521,7 @@ export class ReviewToolHandler {
         ...(commit && { commit }),
       };
 
-      return {
-        content: [
-          {
-            type: 'text',
-            text: response,
-            _meta: metadata,
-          },
-        ],
-        structuredContent: isStructuredContentEnabled() ? metadata : undefined,
-      };
+      return this.createTextResult(response, metadata);
     } catch (error) {
       if (error instanceof ZodError) {
         throw new ValidationError(TOOLS.REVIEW, error.message);
@@ -503,12 +542,13 @@ export class ReviewToolHandler {
  * WebSearchToolHandler - Perform web search via Codex CLI with --search flag
  * Enables Codex's native web_search tool by using --search before exec subcommand
  */
-export class WebSearchToolHandler {
+export class WebSearchToolHandler extends CommandBackedToolHandler {
   async execute(
     args: unknown,
-    context: ToolHandlerContext = defaultContext
+    context?: ToolHandlerContext
   ): Promise<ToolResult> {
     try {
+      const toolContext = getToolContext(context);
       const {
         query,
         numResults = 10,
@@ -516,7 +556,7 @@ export class WebSearchToolHandler {
       }: WebSearchToolArgs = WebSearchToolSchema.parse(args);
 
       // Send initial progress notification
-      await context.sendProgress(`Searching for: ${query}...`, 0);
+      await toolContext.sendProgress(`Searching for: ${query}...`, 0);
 
       // Build direct search prompt that leverages the enabled web_search tool
       const searchPrompt = `Search for: ${query}. Provide ${numResults} key findings.${searchDepth === 'full' ? ' Include detailed analysis and context.' : ''}`;
@@ -529,20 +569,13 @@ export class WebSearchToolHandler {
         searchPrompt,
       ];
 
-      // Use streaming execution if progress is enabled
-      const useStreaming = !!context.progressToken;
-
-      const result = useStreaming
-        ? await executeCommandStreaming('codex', cmdArgs, {
-            onProgress: (message) => {
-              context.sendProgress(message);
-            },
-          })
-        : await executeCommand('codex', cmdArgs);
+      const result = await this.executeCodexCommand(cmdArgs, toolContext);
 
       // Get response from stdout or stderr (Codex may output to either)
-      const response =
-        result.stdout || result.stderr || 'No search output from Codex';
+      const response = getCommandResponse(
+        result,
+        'No search output from Codex'
+      );
 
       // Prepare metadata
       const metadata: Record<string, unknown> = {
@@ -552,16 +585,7 @@ export class WebSearchToolHandler {
         timestamp: new Date().toISOString(),
       };
 
-      return {
-        content: [
-          {
-            type: 'text',
-            text: response,
-            _meta: metadata,
-          },
-        ],
-        structuredContent: isStructuredContentEnabled() ? metadata : undefined,
-      };
+      return this.createTextResult(response, metadata);
     } catch (error) {
       if (error instanceof ZodError) {
         throw new ValidationError(TOOLS.WEBSEARCH, error.message);
@@ -578,11 +602,11 @@ export class WebSearchToolHandler {
 // Tool handler registry
 const sessionStorage = new InMemorySessionStorage();
 
-export const toolHandlers = {
+export const toolHandlers: Record<ToolName, ToolHandler> = {
   [TOOLS.CODEX]: new CodexToolHandler(sessionStorage),
   [TOOLS.REVIEW]: new ReviewToolHandler(),
   [TOOLS.PING]: new PingToolHandler(),
   [TOOLS.HELP]: new HelpToolHandler(),
   [TOOLS.LIST_SESSIONS]: new ListSessionsToolHandler(sessionStorage),
   [TOOLS.WEBSEARCH]: new WebSearchToolHandler(),
-} as const;
+};
diff --git a/src/types.ts b/src/types.ts
index a940f81..bac0ebe 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -13,11 +13,13 @@ export const TOOLS = {
 export type ToolName = typeof TOOLS[keyof typeof TOOLS];
 
 // Codex model constants
-export const DEFAULT_CODEX_MODEL = 'gpt-5.3-codex' as const;
+export const DEFAULT_CODEX_MODEL = 'gpt-5.4' as const;
 export const CODEX_DEFAULT_MODEL_ENV_VAR = 'CODEX_DEFAULT_MODEL' as const;
 
-// Available model options (for documentation/reference)
-export const AVAILABLE_CODEX_MODELS = [
+// Common model examples shown in tool descriptions. Validation stays free-form
+// so callers can pass any Codex CLI-supported model string.
+const DOCUMENTED_CODEX_MODELS = [
+  'gpt-5.4',
   'gpt-5.3-codex',
   'gpt-5.2-codex',
   'gpt-5.1-codex',
@@ -31,7 +33,7 @@ export const AVAILABLE_CODEX_MODELS = [
 
 // Helper function to generate model description
 export const getModelDescription = (toolType: 'codex' | 'review') => {
-  const modelList = AVAILABLE_CODEX_MODELS.join(', ');
+  const modelList = DOCUMENTED_CODEX_MODELS.join(', ');
   if (toolType === 'codex') {
     return `Specify which model to use (defaults to ${DEFAULT_CODEX_MODEL}). Options: ${modelList}`;
   }
@@ -90,7 +92,7 @@ export const SandboxMode = z.enum([
 ]);
 
 // Zod schemas for tool arguments
-export const CodexToolSchema = z.object({
+const CodexToolBaseSchema = z.object({
   prompt: z.string(),
   sessionId: z
     .string()
@@ -104,8 +106,30 @@ export const CodexToolSchema = z.object({
   reasoningEffort: z.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']).optional(),
   sandbox: SandboxMode.optional(),
   fullAuto: z.boolean().optional(),
+  bypassApprovals: z.boolean().optional(),
   workingDirectory: z.string().optional(),
   callbackUri: z.string().optional(),
+  timeoutMs: z.number().int().positive().optional(),
+});
+
+export const CodexToolSchema = CodexToolBaseSchema.superRefine((value, ctx) => {
+  if (value.bypassApprovals && value.sandbox) {
+    ctx.addIssue({
+      code: 'custom',
+      path: ['bypassApprovals'],
+      message:
+        'bypassApprovals cannot be combined with sandbox because it disables sandboxing entirely',
+    });
+  }
+
+  if (value.bypassApprovals && value.fullAuto) {
+    ctx.addIssue({
+      code: 'custom',
+      path: ['bypassApprovals'],
+      message:
+        'bypassApprovals cannot be combined with fullAuto because fullAuto requires sandboxed execution',
+    });
+  }
 });
 
 // Review tool schema
@@ -152,5 +176,6 @@ export type ProgressToken = string | number;
 // Context passed to tool handlers for sending progress notifications
 export interface ToolHandlerContext {
   progressToken?: ProgressToken;
+  abortSignal?: AbortSignal;
   sendProgress: (message: string, progress?: number, total?: number) => Promise<void>;
 }
diff --git a/src/utils/command.ts b/src/utils/command.ts
index c5571a0..9454fe7 100644
--- a/src/utils/command.ts
+++ b/src/utils/command.ts
@@ -1,10 +1,12 @@
-type ProcessEnv = Record<string, string | undefined>;
 import { type SpawnOptionsWithoutStdio, spawn } from 'child_process';
 import { Buffer } from 'node:buffer';
 import path from 'node:path';
 import chalk from 'chalk';
 import { CommandExecutionError } from '../errors.js';
 import { type CommandResult } from '../types.js';
+import { isCommandLoggingEnabled } from '../runtime-config.js';
+
+type ProcessEnv = Record<string, string | undefined>;
 
 /**
  * Escape argument for Windows shell (cmd.exe)
@@ -30,143 +32,152 @@ export type ProgressCallback = (message: string) => void;
 export interface CommandOptions {
   envOverride?: ProcessEnv;
   cwd?: string;
+  signal?: AbortSignal;
+  onProgress?: ProgressCallback;
 }
 
-export interface StreamingCommandOptions extends CommandOptions {
-  onProgress?: ProgressCallback;
+export type StreamingCommandOptions = CommandOptions;
+
+type StreamName = 'stdout' | 'stderr';
+type SpawnedChildProcess = ReturnType<typeof spawn>;
+
+const FORCE_KILL_TIMEOUT_MS = 2000;
+const PROGRESS_DEBOUNCE_MS = 100;
+
+function getEscapedArgs(args: string[]): string[] {
+  return isWindows ? args.map(escapeArgForWindows) : args;
 }
 
-export async function executeCommand(
-  file: string,
-  args: string[] = [],
-  options?: CommandOptions
-): Promise<CommandResult> {
-  return new Promise((resolve, reject) => {
-    // Escape args for Windows shell
-    const escapedArgs = isWindows ? args.map(escapeArgForWindows) : args;
+function getSpawnOptions(
+  options: CommandOptions = {}
+): SpawnOptionsWithoutStdio {
+  const spawnOptions: SpawnOptionsWithoutStdio = {
+    shell: isWindows,
+    env: options.envOverride
+      ? { ...process.env, ...options.envOverride }
+      : process.env,
+  };
+
+  if (options.cwd) {
+    spawnOptions.cwd = path.resolve(options.cwd);
+  }
 
-    console.error(chalk.blue('Executing:'), file, escapedArgs.join(' '));
+  if (options.signal && !isWindows) {
+    // Create an isolated process group so abort can terminate the full tree.
+    spawnOptions.detached = true;
+  }
 
-    const spawnOptions: SpawnOptionsWithoutStdio = {
-      shell: isWindows,
-      env: options?.envOverride
-        ? { ...process.env, ...options.envOverride }
-        : process.env,
-    };
+  return spawnOptions;
+}
 
-    if (options?.cwd) {
-      spawnOptions.cwd = path.resolve(options.cwd);
+function sendKillSignal(
+  child: SpawnedChildProcess,
+  signal: 'SIGTERM' | 'SIGKILL',
+  isWindowsPlatform: boolean
+): void {
+  child.kill(signal);
+  if (!isWindowsPlatform && child.pid) {
+    try {
+      process.kill(-child.pid, signal);
+    } catch {
+      // Ignore if process group no longer exists.
     }
+  }
+}
 
-    const child = spawn(file, escapedArgs, spawnOptions);
+function killChild(
+  child: SpawnedChildProcess,
+  isWindowsPlatform: boolean
+): void {
+  sendKillSignal(child, 'SIGTERM', isWindowsPlatform);
+  const forceKillTimer = globalThis.setTimeout(
+    () => sendKillSignal(child, 'SIGKILL', isWindowsPlatform),
+    FORCE_KILL_TIMEOUT_MS
+  );
+  child.once('exit', () => globalThis.clearTimeout(forceKillTimer));
+}
 
-    // Close stdin to prevent processes like codex exec from waiting forever for input
-    // When spawned with stdio pipe, codex waits for stdin EOF that never arrives
-    child.stdin?.end();
+function bindAbortSignal(
+  child: SpawnedChildProcess,
+  signal?: AbortSignal
+): void {
+  if (!signal) {
+    return;
+  }
 
-    let stdout = '';
-    let stderr = '';
-    let stdoutTruncated = false;
-    let stderrTruncated = false;
+  if (signal.aborted) {
+    killChild(child, isWindows);
+    return;
+  }
 
-    child.stdout.on('data', (data: Buffer) => {
-      if (!stdoutTruncated) {
-        const chunk = data.toString();
-        if (stdout.length + chunk.length > MAX_BUFFER_SIZE) {
-          stdout += chunk.slice(0, MAX_BUFFER_SIZE - stdout.length);
-          stdoutTruncated = true;
-          console.error(chalk.yellow('Warning: stdout truncated at 10MB'));
-        } else {
-          stdout += chunk;
-        }
-      }
-    });
+  signal.addEventListener('abort', () => killChild(child, isWindows), {
+    once: true,
+  });
+}
 
-    child.stderr.on('data', (data: Buffer) => {
-      if (!stderrTruncated) {
-        const chunk = data.toString();
-        if (stderr.length + chunk.length > MAX_BUFFER_SIZE) {
-          stderr += chunk.slice(0, MAX_BUFFER_SIZE - stderr.length);
-          stderrTruncated = true;
-          console.error(chalk.yellow('Warning: stderr truncated at 10MB'));
-        } else {
-          stderr += chunk;
-        }
-      }
-    });
+function appendOutput(
+  currentValue: string,
+  chunk: string,
+  streamName: StreamName
+): { nextValue: string; truncated: boolean } {
+  if (currentValue.length + chunk.length > MAX_BUFFER_SIZE) {
+    console.error(chalk.yellow(`Warning: ${streamName} truncated at 10MB`));
+    return {
+      nextValue:
+        currentValue + chunk.slice(0, MAX_BUFFER_SIZE - currentValue.length),
+      truncated: true,
+    };
+  }
 
-    child.on('close', (code) => {
-      if (stderr) {
-        console.error(chalk.yellow('Command stderr:'), stderr);
-      }
+  return {
+    nextValue: currentValue + chunk,
+    truncated: false,
+  };
+}
 
-      // Accept exit code 0 or if we got stdout/stderr output
-      // Note: codex CLI writes most output to stderr, so we must check both
-      if (code === 0 || stdout || stderr) {
-        if (code !== 0 && (stdout || stderr)) {
-          console.error(
-            chalk.yellow('Command failed but produced output, using output')
-          );
-        }
-        resolve({ stdout, stderr });
-      } else {
-        reject(
-          new CommandExecutionError(
-            [file, ...args].join(' '),
-            `Command failed with exit code ${code}`,
-            new Error(stderr || 'Unknown error')
-          )
-        );
-      }
-    });
+function createProgressReporter(
+  onProgress?: ProgressCallback
+): ProgressCallback | undefined {
+  if (!onProgress) {
+    return undefined;
+  }
 
-    child.on('error', (error) => {
-      reject(
-        new CommandExecutionError(
-          [file, ...args].join(' '),
-          'Command execution failed',
-          error
-        )
-      );
-    });
-  });
+  let lastProgressTime = 0;
+  return (message: string) => {
+    const now = Date.now();
+    if (now - lastProgressTime >= PROGRESS_DEBOUNCE_MS) {
+      onProgress(message);
+      lastProgressTime = now;
+    }
+  };
 }
 
-/**
- * Execute a command with streaming output support.
- * Calls onProgress callback with each chunk of output for real-time feedback.
- *
- * Note: Unlike executeCommand, this function treats stderr output as success
- * because tools like codex write their primary output to stderr. This is
- * intentional for streaming use cases where we want to capture all output.
- */
-export async function executeCommandStreaming(
+function createCommandError(
+  file: string,
+  args: string[],
+  message: string,
+  cause: unknown
+): CommandExecutionError {
+  return new CommandExecutionError([file, ...args].join(' '), message, cause);
+}
+
+export async function executeCommand(
   file: string,
   args: string[] = [],
-  options: StreamingCommandOptions = {}
+  options: CommandOptions = {}
 ): Promise<CommandResult> {
   return new Promise((resolve, reject) => {
-    // Escape args for Windows shell
-    const escapedArgs = isWindows ? args.map(escapeArgForWindows) : args;
-
-    console.error(
-      chalk.blue('Executing (streaming):'),
-      file,
-      escapedArgs.join(' ')
-    );
-
-    const spawnOptions: SpawnOptionsWithoutStdio = {
-      shell: isWindows, // Use shell on Windows to inherit PATH correctly
-      env: options.envOverride
-        ? { ...process.env, ...options.envOverride }
-        : process.env,
-    };
+    const escapedArgs = getEscapedArgs(args);
+    const isStreaming = !!options.onProgress;
+    const logLabel = isStreaming ? 'Executing (streaming):' : 'Executing:';
+    const commandLoggingEnabled = isCommandLoggingEnabled();
 
-    if (options.cwd) {
-      spawnOptions.cwd = path.resolve(options.cwd);
+    if (commandLoggingEnabled) {
+      console.error(chalk.blue(logLabel), file, escapedArgs.join(' '));
     }
 
-    const child = spawn(file, escapedArgs, spawnOptions);
+    const child = spawn(file, escapedArgs, getSpawnOptions(options));
+    bindAbortSignal(child, options.signal);
 
     // Close stdin to prevent processes like codex exec from waiting forever for input
     // When spawned with stdio pipe, codex waits for stdin EOF that never arrives
@@ -176,54 +187,32 @@ export async function executeCommandStreaming(
     let stderr = '';
     let stdoutTruncated = false;
     let stderrTruncated = false;
-    let lastProgressTime = 0;
-    const PROGRESS_DEBOUNCE_MS = 100; // Debounce progress updates
-
-    const sendProgress = (message: string) => {
-      if (!options.onProgress) return;
-
-      const now = Date.now();
-      // Debounce to avoid flooding with progress updates
-      if (now - lastProgressTime >= PROGRESS_DEBOUNCE_MS) {
-        options.onProgress(message);
-        lastProgressTime = now;
-      }
-    };
+    const reportProgress = createProgressReporter(options.onProgress);
 
     child.stdout?.on('data', (data: Buffer) => {
       const chunk = data.toString();
       if (!stdoutTruncated) {
-        if (stdout.length + chunk.length > MAX_BUFFER_SIZE) {
-          stdout += chunk.slice(0, MAX_BUFFER_SIZE - stdout.length);
-          stdoutTruncated = true;
-          console.error(chalk.yellow('Warning: stdout truncated at 10MB'));
-        } else {
-          stdout += chunk;
-        }
+        const nextOutput = appendOutput(stdout, chunk, 'stdout');
+        stdout = nextOutput.nextValue;
+        stdoutTruncated = nextOutput.truncated;
       }
-      sendProgress(chunk.trim());
+      reportProgress?.(chunk.trim());
     });
 
     child.stderr?.on('data', (data: Buffer) => {
       const chunk = data.toString();
       if (!stderrTruncated) {
-        if (stderr.length + chunk.length > MAX_BUFFER_SIZE) {
-          stderr += chunk.slice(0, MAX_BUFFER_SIZE - stderr.length);
-          stderrTruncated = true;
-          console.error(chalk.yellow('Warning: stderr truncated at 10MB'));
-        } else {
-          stderr += chunk;
-        }
+        const nextOutput = appendOutput(stderr, chunk, 'stderr');
+        stderr = nextOutput.nextValue;
+        stderrTruncated = nextOutput.truncated;
       }
-      // Also send stderr as progress - codex outputs to stderr
-      sendProgress(chunk.trim());
+      reportProgress?.(chunk.trim());
     });
 
     child.on('close', (code) => {
-      // Send final progress if there's any remaining output
       if (options.onProgress && (stdout || stderr)) {
         const finalOutput = stdout || stderr;
-        const lastChunk = finalOutput.slice(-500); // Last 500 chars
+        const lastChunk = finalOutput.slice(-500);
         if (lastChunk.trim()) {
           options.onProgress(
             `[Completed] ${lastChunk.trim().slice(0, 200)}...`
@@ -231,33 +220,56 @@ export async function executeCommandStreaming(
         }
       }
 
+      if (!isStreaming && stderr && commandLoggingEnabled) {
+        console.error(chalk.yellow('Command stderr:'), stderr);
+      }
+
       if (code === 0 || stdout || stderr) {
-        // Success or we have output (treat as success like the original)
-        if (code !== 0 && (stdout || stderr)) {
+        if (code !== 0 && (stdout || stderr) && commandLoggingEnabled) {
           console.error(
             chalk.yellow('Command failed but produced output, using output')
           );
         }
         resolve({ stdout, stderr });
-      } else {
-        reject(
-          new CommandExecutionError(
-            [file, ...args].join(' '),
-            `Command exited with code ${code}`,
-            new Error(`Exit code: ${code}`)
-          )
-        );
+        return;
       }
-    });
 
-    child.on('error', (error) => {
       reject(
-        new CommandExecutionError(
-          [file, ...args].join(' '),
-          'Command execution failed',
-          error
-        )
+        isStreaming
+          ? createCommandError(
+              file,
+              args,
+              `Command exited with code ${code}`,
+              new Error(`Exit code: ${code}`)
+            )
+          : createCommandError(
+              file,
+              args,
+              `Command failed with exit code ${code}`,
+              new Error(stderr || 'Unknown error')
+            )
       );
     });
+
+    child.on('error', (error) => {
+      reject(createCommandError(file, args, 'Command execution failed', error));
+    });
   });
 }
+
+/**
+ * Execute a command with streaming output support.
+ * Calls onProgress callback with each chunk of output for real-time feedback
+ * by delegating to executeCommand with onProgress set.
+ *
+ * Note: Commands that produce stdout or stderr are treated as success even if
+ * exit code is non-zero. This preserves codex CLI behavior, which frequently
+ * writes primary output to stderr.
+ */
+export async function executeCommandStreaming(
+  file: string,
+  args: string[] = [],
+  options: StreamingCommandOptions = {}
+): Promise<CommandResult> {
+  return executeCommand(file, args, options);
+}