MCPJam · chelojimenez · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/docs/sdk/concepts/connecting-servers.mdx b/docs/sdk/concepts/connecting-servers.mdx
@@ -176,7 +176,7 @@ const manager = new MCPClientManager({
 
 await manager.connectToServer("myServer");
 
-// Get tools in AI SDK format
+// Get tools for TestAgent
 const tools = await manager.getTools();
 
 // Create agent with those tools

diff --git a/docs/sdk/concepts/testing-with-llms.mdx b/docs/sdk/concepts/testing-with-llms.mdx
@@ -143,6 +143,43 @@ const agent = new TestAgent({
 });
 ```
 
+## Control Multi-Step Loops with stopWhen
+
+Use `stopWhen` when you want to stop the multi-step loop after a particular step completes:
+
+```typescript
+import { hasToolCall } from "@mcpjam/sdk";
+
+// Stop after the step where the tool is called
+const result = await agent.prompt("Search for open tasks", {
+  stopWhen: hasToolCall("search_tasks"),
+});
+
+expect(result.hasToolCall("search_tasks")).toBe(true);
+```
+
+<Tip>
+`stopWhen` does not skip tool execution. It controls whether the prompt loop continues after the current step completes, and `TestAgent` also applies `stepCountIs(maxSteps)` as a safety guard.
+</Tip>
+
+## Bound Prompt Runtime with timeout
+
+Use `timeout` when you want to bound how long `TestAgent.prompt()` can run:
+
+```typescript
+const result = await agent.prompt("Run a long workflow", {
+  timeout: { totalMs: 10_000, stepMs: 2_500 },
+});
+
+if (result.hasError()) {
+  console.error(result.getError());
+}
+```
+
+<Tip>
+`timeout` accepts `number`, `totalMs`, `stepMs`, and `chunkMs`. In practice, `number` and `totalMs` cap the full prompt, `stepMs` caps each step, and `chunkMs` mainly matters in streaming flows. The runtime creates an internal abort signal, so tools can stop early if their implementation respects the provided `abortSignal`.
+</Tip>
+
 ## Writing Assertions
 
 Use validators to assert tool call behavior:

diff --git a/docs/sdk/reference/llm-providers.mdx b/docs/sdk/reference/llm-providers.mdx
@@ -384,7 +384,7 @@ const { provider, modelId } = parseLLMString("anthropic/claude-sonnet-4-20250514
 
 ### createModelFromString()
 
-Create a Vercel AI SDK model directly.
+Create a provider model instance directly.
 
 ```typescript
 import { createModelFromString } from "@mcpjam/sdk";

diff --git a/docs/sdk/reference/prompt-result.mdx b/docs/sdk/reference/prompt-result.mdx
@@ -328,7 +328,7 @@ getMessages(): CoreMessage[]
 
 #### Returns
 
-`CoreMessage[]` - Vercel AI SDK message format.
+`CoreMessage[]` - The full conversation message format used by the SDK.
 
 #### Example
 

diff --git a/docs/sdk/reference/test-agent.mdx b/docs/sdk/reference/test-agent.mdx
@@ -4,7 +4,7 @@ description: "API reference for TestAgent"
 icon: "book"
 ---
 
-The `TestAgent` class wraps LLM providers via the Vercel AI SDK, enabling you to run prompts with MCP tools. It handles the agentic loop and returns rich result objects.
+The `TestAgent` class runs prompts with MCP tools enabled. It handles the multi-step prompt loop and returns rich result objects.
 
 ## Import
 
@@ -76,6 +76,8 @@ prompt(
 | Property | Type | Description |
 |----------|------|-------------|
 | `context` | `PromptResult \| PromptResult[]` | Previous result(s) for multi-turn conversations |
+| `stopWhen` | `StopCondition<ToolSet> \| Array<StopCondition<ToolSet>>` | Additional conditions for the multi-step prompt loop. Tools still execute normally. `TestAgent` always applies `stepCountIs(maxSteps)` as a safety guard. |
+| `timeout` | `number \| { totalMs?: number; stepMs?: number; chunkMs?: number }` | Bounds prompt runtime. `number` and `totalMs` cap the full prompt, `stepMs` caps each generation step, and `chunkMs` is accepted for parity but is mainly relevant to streaming APIs. |
 
 #### Returns
 
@@ -84,6 +86,8 @@ prompt(
 #### Example
 
 ```typescript
+import { hasToolCall } from "@mcpjam/sdk";
+
 // Simple prompt
 const result = await agent.prompt("Add 2 and 3");
 
@@ -93,12 +97,31 @@ const r2 = await agent.prompt("Mark it complete", { context: r1 });
 
 // Multiple context items
 const r3 = await agent.prompt("Show summary", { context: [r1, r2] });
+
+// Stop the loop after the step where a tool is called
+const r4 = await agent.prompt("Search for tasks", {
+  stopWhen: hasToolCall("search_tasks"),
+});
+console.log(r4.hasToolCall("search_tasks"));
+
+// Bound prompt runtime
+const r5 = await agent.prompt("Run a long workflow", {
+  timeout: { totalMs: 10_000, stepMs: 2_500 },
+});
+
+if (r5.hasError()) {
+  console.error(r5.getError());
+}
 ```
 
 <Note>
 `prompt()` never throws exceptions. Errors are captured in the `PromptResult`. Check `result.hasError()` to detect failures.
 </Note>
 
+<Info>
+`timeout` bounds prompt runtime. The runtime creates an internal abort signal, so tools can stop early if their implementation respects the provided `abortSignal`. If a tool ignores that signal, its underlying work may continue briefly after the prompt returns an error result.
+</Info>
+
 ---
 
 ## Model String Format
@@ -233,6 +256,52 @@ Setting `maxSteps` too low may prevent complex tasks from completing. Setting it
 
 ---
 
+## Control Multi-Step Loops with stopWhen
+
+Use `stopWhen` to control whether the agent starts another step after the current step completes.
+
+```typescript
+import { hasToolCall } from "@mcpjam/sdk";
+
+// Stop after the step where "search_tasks" is called
+const result = await agent.prompt("Find my open tasks", {
+  stopWhen: hasToolCall("search_tasks"),
+});
+
+expect(result.hasToolCall("search_tasks")).toBe(true);
+
+// Stop after any of multiple conditions
+const result2 = await agent.prompt("Do something", {
+  stopWhen: [hasToolCall("tool_a"), hasToolCall("tool_b")],
+});
+```
+
+<Info>
+`stopWhen` does not skip tool execution. It controls whether the prompt loop continues after the current step completes, and `TestAgent` also applies `stepCountIs(maxSteps)` as a safety guard.
+</Info>
+
+---
+
+## Bound Prompt Runtime with timeout
+
+Use `timeout` when you want to bound how long `TestAgent.prompt()` can run:
+
+```typescript
+const result = await agent.prompt("Run a long workflow", {
+  timeout: 10_000,
+});
+
+const result2 = await agent.prompt("Run a long workflow", {
+  timeout: { totalMs: 10_000, stepMs: 2_500, chunkMs: 1_000 },
+});
+```
+
+<Info>
+`chunkMs` is accepted for parity, but it is mainly useful for streaming APIs. For `TestAgent.prompt()`, `number`, `totalMs`, and `stepMs` are the main settings to focus on.
+</Info>
+
+---
+
 ## Complete Example
 
 ```typescript

diff --git a/sdk/README.md b/sdk/README.md
@@ -163,7 +163,7 @@ await manager.connectToServer("asana", {
   },
 });
 
-// Get tools for AI SDK integration
+// Get tools for TestAgent
 const tools = await manager.getToolsForAiSdk(["everything", "asana"]);
 
 // Direct MCP operations
@@ -187,6 +187,8 @@ await manager.disconnectServer("everything");
 Runs LLM prompts with MCP tool access.
 
 ```ts
+import { hasToolCall } from "@mcpjam/sdk";
+
 const agent = new TestAgent({
   tools: await manager.getToolsForAiSdk(),
   model: "openai/gpt-4o",        // provider/model format
@@ -202,8 +204,24 @@ const result = await agent.prompt("Add 2 and 3");
 // Multi-turn with context
 const r1 = await agent.prompt("Who am I?");
 const r2 = await agent.prompt("List my projects", { context: [r1] });
+
+// Stop the loop after the step where a tool is called
+const r3 = await agent.prompt("Search tasks", {
+  stopWhen: hasToolCall("search_tasks"),
+});
+r3.hasToolCall("search_tasks");          // true
+
+// Bound prompt runtime
+const r4 = await agent.prompt("Run a long workflow", {
+  timeout: { totalMs: 10_000, stepMs: 2_500 },
+});
+r4.hasError();                           // true if the prompt timed out
 ```
 
+`stopWhen` does not skip tool execution. It controls whether the prompt loop continues after the current step completes, and `TestAgent` also applies `stepCountIs(maxSteps)` as a safety guard.
+
+`timeout` bounds prompt runtime. `number` and `totalMs` cap the full prompt, `stepMs` caps each step, and `chunkMs` is accepted for parity but mainly matters in streaming flows. The runtime creates an internal abort signal, so tools can stop early if their implementation respects the provided `abortSignal`.
+
 **Supported providers:** `openai`, `anthropic`, `azure`, `google`, `mistral`, `deepseek`, `ollama`, `openrouter`, `xai`
 
 </details>

diff --git a/sdk/skills/create-mcp-eval/SKILL.md b/sdk/skills/create-mcp-eval/SKILL.md
@@ -172,7 +172,7 @@ await manager.connectToServer("server-id", {
   env: { API_KEY: "..." },
 });
 
-// Get AI SDK-compatible tools for TestAgent
+// Get tools for TestAgent
 const tools = await manager.getToolsForAiSdk(["server-id"]);
 
 // Cleanup
@@ -185,6 +185,7 @@ await manager.disconnectAllServers();
 
 ```typescript
 import { TestAgent } from "@mcpjam/sdk";
+import { hasToolCall } from "@mcpjam/sdk";
 
 const agent = new TestAgent({
   tools,                              // from manager.getToolsForAiSdk()
@@ -200,6 +201,18 @@ const result = await agent.prompt("List all projects");
 const r1 = await agent.prompt("Get my user profile");
 const r2 = await agent.prompt("List workspaces for that user", { context: r1 });
 
+// Stop the loop after the step where a tool is called
+const r3 = await agent.prompt("Search tasks", {
+  stopWhen: hasToolCall("search_tasks"),
+});
+r3.hasToolCall("search_tasks");          // true
+
+// Bound prompt runtime
+const r4 = await agent.prompt("Run a long workflow", {
+  timeout: { totalMs: 10_000, stepMs: 2_500 },
+});
+r4.hasError();                           // true if the prompt timed out
+
 // Mock agent for deterministic tests (no LLM needed)
 const mockAgent = TestAgent.mock(async (message) =>
   PromptResult.from({
@@ -216,6 +229,10 @@ const mockAgent = TestAgent.mock(async (message) =>
 );
 ```
 
+`stopWhen` does not skip tool execution. It controls whether the prompt loop continues after the current step completes, and `TestAgent` also applies `stepCountIs(maxSteps)` as a safety guard.
+
+`timeout` bounds prompt runtime. `number` and `totalMs` cap the full prompt, `stepMs` caps each step, and `chunkMs` is accepted for parity but mainly matters in streaming flows. The runtime creates an internal abort signal, so tools can stop early if their implementation respects the provided `abortSignal`.
+
 ### PromptResult — Inspect Agent Responses
 
 ```typescript
@@ -998,4 +1015,3 @@ it("selects search_tasks", async () => {
   expect(result.hasToolCall("search_tasks")).toBe(true);
 }, 90_000);
 ```
-
diff --git a/sdk/src/EvalAgent.ts b/sdk/src/EvalAgent.ts
@@ -1,3 +1,4 @@
+import type { StopCondition, TimeoutConfiguration, ToolSet } from "ai";
 import type { PromptResult } from "./PromptResult.js";
 
 /**
@@ -6,6 +7,45 @@ import type { PromptResult } from "./PromptResult.js";
 export interface PromptOptions {
   /** Previous PromptResult(s) to include as conversation context for multi-turn conversations */
   context?: PromptResult | PromptResult[];
+
+  /**
+   * Additional stop conditions for the agentic loop.
+   * Evaluated after each step completes (tools execute normally).
+   * `stepCountIs(maxSteps)` is always applied as a safety guard
+   * in addition to any conditions provided here.
+   *
+   * Import helpers like `hasToolCall` and `stepCountIs` from `"@mcpjam/sdk"`.
+   *
+   * @example
+   * ```typescript
+   * import { hasToolCall } from "@mcpjam/sdk";
+   *
+   * // Stop the loop after the step where "search_tasks" is called
+   * const result = await agent.prompt("Find my tasks", {
+   *   stopWhen: hasToolCall("search_tasks"),
+   * });
+   * expect(result.hasToolCall("search_tasks")).toBe(true);
+   *
+   * // Multiple conditions (any one being true stops the loop)
+   * const result = await agent.prompt("Do something", {
+   *   stopWhen: [hasToolCall("tool_a"), hasToolCall("tool_b")],
+   * });
+   * ```
+   */
+  stopWhen?: StopCondition<ToolSet> | Array<StopCondition<ToolSet>>;
+
+  /**
+   * Timeout for the prompt runtime.
+   *
+   * - `number`: total timeout for the entire prompt call in milliseconds
+   * - `{ totalMs }`: total timeout across all steps
+   * - `{ stepMs }`: timeout for each generation step
+   * - `{ chunkMs }`: accepted for parity and primarily relevant to streaming APIs
+   *
+   * The runtime creates an internal abort signal. Tools can stop early if they
+   * respect the `abortSignal` passed to `execute()`.
+   */
+  timeout?: TimeoutConfiguration;
 }
 
 /**

diff --git a/sdk/src/TestAgent.ts b/sdk/src/TestAgent.ts
@@ -3,7 +3,7 @@
  */
 
 import { generateText, stepCountIs, dynamicTool, jsonSchema } from "ai";
-import type { ToolSet, ModelMessage, UserModelMessage } from "ai";
+import type { StopCondition, ToolSet, ModelMessage, UserModelMessage } from "ai";
 import { CallToolResultSchema } from "@modelcontextprotocol/sdk/types.js";
 import { createModelFromString, parseLLMString } from "./model-factory.js";
 import type { CreateModelOptions } from "./model-factory.js";
@@ -318,6 +318,19 @@ export class TestAgent implements EvalAgent {
     return instrumented;
   }
 
+  private resolveStopWhen(
+    stopWhen?: PromptOptions["stopWhen"]
+  ): Array<StopCondition<ToolSet>> {
+    const base = [stepCountIs(this.maxSteps)];
+
+    if (stopWhen == null) {
+      return base;
+    }
+
+    const conditions = Array.isArray(stopWhen) ? stopWhen : [stopWhen];
+    return [...base, ...conditions];
+  }
+
   /**
    * Build an array of ModelMessages from previous PromptResult(s) for multi-turn context.
    * @param context - Single PromptResult or array of PromptResults to include as context
@@ -383,10 +396,13 @@ export class TestAgent implements EvalAgent {
       const model = createModelFromString(this.model, modelOptions);
 
       // Instrument tools to track MCP execution time
-      const instrumentedTools = this.createInstrumentedTools((ms) => {
-        totalMcpMs += ms;
-        stepMcpMs += ms; // Accumulate per-step for LLM calculation
-      }, widgetSnapshots);
+      const instrumentedTools = this.createInstrumentedTools(
+        (ms) => {
+          totalMcpMs += ms;
+          stepMcpMs += ms; // Accumulate per-step for LLM calculation
+        },
+        widgetSnapshots
+      );
 
       // Build messages array if context is provided for multi-turn
       const contextMessages = this.buildContextMessages(options?.context);
@@ -405,9 +421,12 @@ export class TestAgent implements EvalAgent {
         ...(this.temperature !== undefined && {
           temperature: this.temperature,
         }),
+        ...(options?.timeout !== undefined && {
+          timeout: options.timeout,
+        }),
         // Use stopWhen with stepCountIs for controlling max agentic steps
         // AI SDK v6+ uses this instead of maxSteps
-        stopWhen: stepCountIs(this.maxSteps),
+        stopWhen: this.resolveStopWhen(options?.stopWhen),
         onStepFinish: () => {
           const now = Date.now();
           const stepDuration = now - lastStepEndTime;