Skip to content

Commit 7fe9486

Browse files
authored
🤖 feat: add web_fetch tool for fetching web pages as markdown (#742)
Adds a new `web_fetch` tool that: - Fetches web pages using curl via the Runtime (respects workspace network context) - Extracts main content using Mozilla Readability - Converts to clean markdown using Turndown - Supports both markdown and plain text output formats ## Features - **Network isolation:** requests originate from workspace, not Mux host - **Robust HTTP handling:** curl handles redirects, SSL, encoding, compression natively - **Size limits:** output truncated to 64KB, HTML input limited to 5MB - **Graceful errors:** DNS failures, timeouts, empty responses handled cleanly ## Dependencies added - `@mozilla/readability`: article extraction - `jsdom`: DOM parsing for Readability - `turndown`: HTML to markdown conversion ## Testing 9 integration tests using real runtime (no mocks): - Real network calls to example.com - Local file:// URLs for HTML parsing tests - Error scenarios (DNS failure, connection refused, empty files) Also updates AGENTS.md testing section to clarify preferred test types: 1. True integration tests (no mocks) 2. Unit tests on pure/isolated logic _Generated with `mux`_
1 parent ab5a724 commit 7fe9486

File tree

13 files changed

+712
-9
lines changed

13 files changed

+712
-9
lines changed

bun.lock

Lines changed: 110 additions & 9 deletions
Large diffs are not rendered by default.

docs/AGENTS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,13 @@ gh pr view <number> --json mergeable,mergeStateStatus | jq '.'
5757

5858
## Testing Doctrine
5959

60+
Two types of tests are preferred:
61+
62+
1. **True integration tests** — use real runtimes, real filesystems, real network calls. No mocks, stubs, or fakes. These prove the system works end-to-end.
63+
2. **Unit tests on pure/isolated logic** — test pure functions or well-isolated modules where inputs and outputs are clear. No mocks needed because the code has no external dependencies.
64+
65+
Avoid mock-heavy tests that verify implementation details rather than behavior. If you need mocks to test something, consider whether the code should be restructured to be more testable.
66+
6067
### Storybook
6168

6269
- Prefer full-app stories (`App.stories.tsx`) to isolated components.

jest.config.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ module.exports = {
1414
moduleNameMapper: {
1515
"^@/(.*)$": "<rootDir>/src/$1",
1616
"^chalk$": "<rootDir>/tests/__mocks__/chalk.js",
17+
"^jsdom$": "<rootDir>/tests/__mocks__/jsdom.js",
1718
},
1819
transform: {
1920
"^.+\\.tsx?$": [

package.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"@ai-sdk/openai": "^2.0.72",
5151
"@ai-sdk/xai": "^2.0.36",
5252
"@lydell/node-pty": "1.1.0",
53+
"@mozilla/readability": "^0.6.0",
5354
"@openrouter/ai-sdk-provider": "^1.2.5",
5455
"@radix-ui/react-checkbox": "^1.3.3",
5556
"@radix-ui/react-dialog": "^1.1.15",
@@ -72,6 +73,7 @@
7273
"electron-updater": "^6.6.2",
7374
"express": "^5.1.0",
7475
"ghostty-web": "0.2.1",
76+
"jsdom": "^27.2.0",
7577
"jsonc-parser": "^3.3.1",
7678
"lru-cache": "^11.2.2",
7779
"lucide-react": "^0.553.0",
@@ -83,6 +85,7 @@
8385
"shescape": "^2.1.6",
8486
"source-map-support": "^0.5.21",
8587
"streamdown": "^1.4.0",
88+
"turndown": "^7.2.2",
8689
"undici": "^7.16.0",
8790
"write-file-atomic": "^6.0.0",
8891
"ws": "^8.18.3",
@@ -106,11 +109,13 @@
106109
"@types/escape-html": "^1.0.4",
107110
"@types/express": "^5.0.3",
108111
"@types/jest": "^30.0.0",
112+
"@types/jsdom": "^27.0.0",
109113
"@types/katex": "^0.16.7",
110114
"@types/markdown-it": "^14.1.2",
111115
"@types/minimist": "^1.2.5",
112116
"@types/react": "^18.2.0",
113117
"@types/react-dom": "^18.2.0",
118+
"@types/turndown": "^5.0.6",
114119
"@types/write-file-atomic": "^4.0.3",
115120
"@types/ws": "^8.18.1",
116121
"@typescript-eslint/eslint-plugin": "^8.44.1",

src/browser/components/Messages/ToolMessage.tsx

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { FileReadToolCall } from "../tools/FileReadToolCall";
88
import { ProposePlanToolCall } from "../tools/ProposePlanToolCall";
99
import { TodoToolCall } from "../tools/TodoToolCall";
1010
import { StatusSetToolCall } from "../tools/StatusSetToolCall";
11+
import { WebFetchToolCall } from "../tools/WebFetchToolCall";
1112
import type {
1213
BashToolArgs,
1314
BashToolResult,
@@ -25,6 +26,8 @@ import type {
2526
TodoWriteToolResult,
2627
StatusSetToolArgs,
2728
StatusSetToolResult,
29+
WebFetchToolArgs,
30+
WebFetchToolResult,
2831
} from "@/common/types/tools";
2932

3033
interface ToolMessageProps {
@@ -81,6 +84,11 @@ function isStatusSetTool(toolName: string, args: unknown): args is StatusSetTool
8184
return TOOL_DEFINITIONS.status_set.schema.safeParse(args).success;
8285
}
8386

87+
function isWebFetchTool(toolName: string, args: unknown): args is WebFetchToolArgs {
88+
if (toolName !== "web_fetch") return false;
89+
return TOOL_DEFINITIONS.web_fetch.schema.safeParse(args).success;
90+
}
91+
8492
export const ToolMessage: React.FC<ToolMessageProps> = ({ message, className, workspaceId }) => {
8593
// Route to specialized components based on tool name
8694
if (isBashTool(message.toolName, message.args)) {
@@ -184,6 +192,18 @@ export const ToolMessage: React.FC<ToolMessageProps> = ({ message, className, wo
184192
);
185193
}
186194

195+
if (isWebFetchTool(message.toolName, message.args)) {
196+
return (
197+
<div className={className}>
198+
<WebFetchToolCall
199+
args={message.args}
200+
result={message.result as WebFetchToolResult | undefined}
201+
status={message.status}
202+
/>
203+
</div>
204+
);
205+
}
206+
187207
// Fallback to generic tool call
188208
return (
189209
<div className={className}>
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import React from "react";
2+
import type { WebFetchToolArgs, WebFetchToolResult } from "@/common/types/tools";
3+
import {
4+
ToolContainer,
5+
ToolHeader,
6+
ExpandIcon,
7+
StatusIndicator,
8+
ToolDetails,
9+
DetailSection,
10+
DetailLabel,
11+
LoadingDots,
12+
} from "./shared/ToolPrimitives";
13+
import { useToolExpansion, getStatusDisplay, type ToolStatus } from "./shared/toolUtils";
14+
import { TooltipWrapper, Tooltip } from "../Tooltip";
15+
import { MarkdownRenderer } from "../Messages/MarkdownRenderer";
16+
17+
interface WebFetchToolCallProps {
18+
args: WebFetchToolArgs;
19+
result?: WebFetchToolResult;
20+
status?: ToolStatus;
21+
}
22+
23+
function formatBytes(bytes: number): string {
24+
if (bytes < 1024) return `${bytes}B`;
25+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`;
26+
return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
27+
}
28+
29+
/**
30+
* Extract domain from URL for compact display
31+
*/
32+
function getDomain(url: string): string {
33+
try {
34+
const parsed = new URL(url);
35+
return parsed.hostname;
36+
} catch {
37+
return url;
38+
}
39+
}
40+
41+
export const WebFetchToolCall: React.FC<WebFetchToolCallProps> = ({
42+
args,
43+
result,
44+
status = "pending",
45+
}) => {
46+
const { expanded, toggleExpanded } = useToolExpansion();
47+
48+
const domain = getDomain(args.url);
49+
50+
return (
51+
<ToolContainer expanded={expanded} className="@container">
52+
<ToolHeader onClick={toggleExpanded}>
53+
<ExpandIcon expanded={expanded}></ExpandIcon>
54+
<TooltipWrapper inline>
55+
<span>🌐</span>
56+
<Tooltip>web_fetch</Tooltip>
57+
</TooltipWrapper>
58+
<div className="text-text flex max-w-96 min-w-0 items-center gap-1.5">
59+
<span className="font-monospace truncate">{domain}</span>
60+
</div>
61+
{result && result.success && (
62+
<span className="text-secondary ml-2 text-[10px] whitespace-nowrap">
63+
<span className="hidden @sm:inline">fetched </span>
64+
{formatBytes(result.length)}
65+
</span>
66+
)}
67+
<StatusIndicator status={status}>{getStatusDisplay(status)}</StatusIndicator>
68+
</ToolHeader>
69+
70+
{expanded && (
71+
<ToolDetails>
72+
<DetailSection>
73+
<div className="bg-code-bg flex flex-wrap gap-4 rounded px-2 py-1.5 text-[11px] leading-[1.4]">
74+
<div className="flex min-w-0 gap-1.5">
75+
<span className="text-secondary font-medium">URL:</span>
76+
<a
77+
href={args.url}
78+
target="_blank"
79+
rel="noopener noreferrer"
80+
className="text-link font-monospace truncate hover:underline"
81+
>
82+
{args.url}
83+
</a>
84+
</div>
85+
{result && result.success && result.title && (
86+
<div className="flex min-w-0 gap-1.5">
87+
<span className="text-secondary font-medium">Title:</span>
88+
<span className="text-text truncate">{result.title}</span>
89+
</div>
90+
)}
91+
</div>
92+
</DetailSection>
93+
94+
{result && (
95+
<>
96+
{result.success === false && result.error && (
97+
<DetailSection>
98+
<DetailLabel>Error</DetailLabel>
99+
<div className="text-danger bg-danger-overlay border-danger rounded border-l-2 px-2 py-1.5 text-[11px]">
100+
{result.error}
101+
</div>
102+
</DetailSection>
103+
)}
104+
105+
{result.success && result.content && (
106+
<DetailSection>
107+
<DetailLabel>Content</DetailLabel>
108+
<div className="bg-code-bg max-h-[300px] overflow-y-auto rounded px-3 py-2 text-[12px]">
109+
<MarkdownRenderer content={result.content} />
110+
</div>
111+
</DetailSection>
112+
)}
113+
</>
114+
)}
115+
116+
{status === "executing" && !result && (
117+
<DetailSection>
118+
<div className="text-secondary text-[11px]">
119+
Fetching page
120+
<LoadingDots />
121+
</div>
122+
</DetailSection>
123+
)}
124+
</ToolDetails>
125+
)}
126+
</ToolContainer>
127+
);
128+
};

src/common/constants/toolLimits.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,8 @@ export const BASH_MAX_LINE_BYTES = 1024; // 1KB per line for AI agent
1717
export const MAX_TODOS = 7; // Maximum number of TODO items in a list
1818

1919
export const STATUS_MESSAGE_MAX_LENGTH = 60; // Maximum length for status messages (auto-truncated)
20+
21+
// Web fetch tool limits
22+
export const WEB_FETCH_TIMEOUT_SECS = 15; // curl timeout
23+
export const WEB_FETCH_MAX_OUTPUT_BYTES = 64 * 1024; // 64KB markdown output
24+
export const WEB_FETCH_MAX_HTML_BYTES = 5 * 1024 * 1024; // 5MB HTML input (curl --max-filesize)

src/common/types/tools.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,22 @@ export type StatusSetToolResult =
201201
success: false;
202202
error: string;
203203
};
204+
205+
// Web Fetch Tool Types
206+
export interface WebFetchToolArgs {
207+
url: string;
208+
}
209+
210+
export type WebFetchToolResult =
211+
| {
212+
success: true;
213+
title: string;
214+
content: string;
215+
url: string;
216+
byline?: string;
217+
length: number;
218+
}
219+
| {
220+
success: false;
221+
error: string;
222+
};

src/common/utils/tools/toolDefinitions.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
BASH_MAX_LINE_BYTES,
1313
BASH_MAX_TOTAL_BYTES,
1414
STATUS_MESSAGE_MAX_LENGTH,
15+
WEB_FETCH_MAX_OUTPUT_BYTES,
1516
} from "@/common/constants/toolLimits";
1617
import { TOOL_EDIT_WARNING } from "@/common/types/tools";
1718

@@ -228,6 +229,16 @@ export const TOOL_DEFINITIONS = {
228229
})
229230
.strict(),
230231
},
232+
web_fetch: {
233+
description:
234+
`Fetch a web page and extract its main content as clean markdown. ` +
235+
`Uses the workspace's network context (requests originate from the workspace, not Mux host). ` +
236+
`Requires curl to be installed in the workspace. ` +
237+
`Output is truncated to ${Math.floor(WEB_FETCH_MAX_OUTPUT_BYTES / 1024)}KB.`,
238+
schema: z.object({
239+
url: z.string().url().describe("The URL to fetch (http or https)"),
240+
}),
241+
},
231242
} as const;
232243

233244
/**
@@ -268,6 +279,7 @@ export function getAvailableTools(modelString: string): string[] {
268279
"todo_write",
269280
"todo_read",
270281
"status_set",
282+
"web_fetch",
271283
];
272284

273285
// Add provider-specific tools

src/common/utils/tools/tools.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ export async function getToolsForModel(
8484
const wrap = <TParameters, TResult>(tool: Tool<TParameters, TResult>) =>
8585
wrapWithInitWait(tool, workspaceId, initStateManager);
8686

87+
// Lazy-load web_fetch to avoid loading jsdom (ESM-only) at Jest setup time
88+
// This allows integration tests to run without transforming jsdom's dependencies
89+
const { createWebFetchTool } = await import("@/node/services/tools/web_fetch");
90+
8791
// Runtime-dependent tools need to wait for workspace initialization
8892
// Wrap them to handle init waiting centrally instead of in each tool
8993
const runtimeTools: Record<string, Tool> = {
@@ -95,6 +99,7 @@ export async function getToolsForModel(
9599
// and line number miscalculations. Use file_edit_replace_string instead.
96100
// file_edit_replace_lines: wrap(createFileEditReplaceLinesTool(config)),
97101
bash: wrap(createBashTool(config)),
102+
web_fetch: wrap(createWebFetchTool(config)),
98103
};
99104

100105
// Non-runtime tools execute immediately (no init wait needed)

0 commit comments

Comments
 (0)