Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 127 additions & 8 deletions scripts/build-wasm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,100 @@ const grammarsDir = resolve(root, 'grammars');

if (!existsSync(grammarsDir)) mkdirSync(grammarsDir);

type PreflightFailure = {
reason: string;
remediation: string[];
};

function printBanner(title: string, lines: string[]): void {
const bar = '─'.repeat(Math.max(title.length + 4, 60));
console.warn(bar);
console.warn(` ${title}`);
console.warn(bar);
for (const l of lines) console.warn(l);
console.warn(bar);
}

// With shell: true, execFileSync concatenates cmd + args into a single string
// and hands it to the shell, so any whitespace in args (e.g. Windows paths like
// `C:\Users\First Last\...`) gets re-split as separate tokens. Quote args that
// contain whitespace so the shell treats them as one argument.
function quoteShellArg(arg: string): string {
if (arg.length === 0) return '""';
if (!/\s/.test(arg)) return arg;
if (process.platform === 'win32') {
// cmd.exe: wrap in double quotes; escape any embedded double quotes.
return `"${arg.replace(/"/g, '""')}"`;
}
// POSIX sh: wrap in single quotes; close/escape/reopen for embedded single quotes.
return `'${arg.replace(/'/g, `'\\''`)}'`;
}

function runCaptured(
cmd: string,
args: string[],
cwd: string,
): { ok: true; stdout: string } | { ok: false; stdout: string; stderr: string; message: string } {
try {
const stdout = execFileSync(cmd, args.map(quoteShellArg), {
cwd,
stdio: ['ignore', 'pipe', 'pipe'],
shell: true,
encoding: 'utf8',
});
Comment on lines +59 to +65
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 shell: true with array args may mishandle paths with spaces

execFileSync with shell: true passes the command to a shell, which joins cmd and args into a single string. Any grammarDir path containing spaces (common on Windows under C:\Users\First Last\...) will be split by the shell, causing a "no such directory" error rather than the expected WASM-toolchain or ENOENT message. This also existed in the original code but is now centralised in runCaptured; wrapping path args in quotes when shell: true is set would harden both the preflight and the per-grammar builds.

If shell: true is required for npx resolution on Windows, consider using spawnSync with shell: true and quoting the path argument explicitly instead.

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 0820688. Added a quoteShellArg helper that wraps whitespace-containing args in platform-appropriate quotes (cmd.exe double quotes on Windows, POSIX single quotes elsewhere) before handing them to the shell. runCaptured now maps every arg through it, so grammarDir paths under C:\Users\First Last\... won't be re-split by the shell.

return { ok: true, stdout };
} catch (err: any) {
return {
ok: false,
stdout: String(err.stdout ?? ''),
stderr: String(err.stderr ?? ''),
message: String(err.message ?? 'unknown error'),
};
}
}

function preflightTreeSitterCli(): PreflightFailure | null {
const result = runCaptured('npx', ['tree-sitter', '--version'], root);
if (result.ok) return null;

const combined = `${result.stderr}\n${result.stdout}\n${result.message}`;
const missingBinary =
/ENOENT.*tree-sitter(\.exe)?/i.test(combined) ||
/tree-sitter(\.exe)? was not found/i.test(combined) ||
/spawn .*tree-sitter(\.exe)?.*ENOENT/i.test(combined);

if (missingBinary) {
return {
reason: "tree-sitter CLI binary is missing — can't build WASM grammars",
remediation: [
'The tree-sitter-cli npm package downloads a platform-specific binary',
'at install time (see node_modules/tree-sitter-cli/install.js). That',
'download appears to have failed or been skipped on this machine,',
'likely due to a network/proxy block or a previous --ignore-scripts install.',
'',
'Remediation — try one of:',
' 1. npm rebuild tree-sitter-cli (re-run the binary download)',
' 2. npm install -g tree-sitter-cli (install globally from PATH)',
' 3. Download tree-sitter from',
' https://github.com/tree-sitter/tree-sitter/releases',
' and place it on PATH',
'',
'Note: the native Rust engine handles parsing on its own. WASM grammars',
'are only needed when running with --engine wasm or on platforms without',
'a prebuilt native addon.',
],
};
}

return {
reason: `tree-sitter CLI is not runnable: ${result.message.trim()}`,
remediation: [
'Inspect node_modules/tree-sitter-cli/ to confirm the package installed cleanly.',
result.stderr.trim() ? `stderr: ${result.stderr.trim().split('\n').slice(0, 3).join(' | ')}` : '',
].filter(Boolean),
};
}

// Allowed WASM imports — pure C runtime / memory primitives only (no I/O, no syscalls)
const ALLOWED_WASM_IMPORTS = new Set([
'env.memory',
Expand Down Expand Up @@ -124,8 +218,16 @@ const grammars = [
{ name: 'tree-sitter-verilog', pkg: 'tree-sitter-verilog', sub: null },
];

const preflight = preflightTreeSitterCli();
if (preflight) {
printBanner(`WASM build skipped: ${preflight.reason}`, preflight.remediation);
console.warn(`\nSkipped building ${grammars.length} grammars. Exiting cleanly (non-fatal — native engine available).`);
process.exit(0);
}

let failed = 0;
let rejected = 0;
let missingToolchain = false;

for (const g of grammars) {
let pkgDir: string;
Expand All @@ -139,15 +241,20 @@ for (const g of grammars) {
const grammarDir = g.sub ? resolve(pkgDir, g.sub) : pkgDir;

console.log(`Building ${g.name}.wasm from ${grammarDir}...`);
try {
execFileSync('npx', ['tree-sitter', 'build', '--wasm', grammarDir], {
cwd: grammarsDir,
stdio: 'inherit',
shell: true,
});
} catch (err: any) {
const build = runCaptured('npx', ['tree-sitter', 'build', '--wasm', grammarDir], grammarsDir);
if (!build.ok) {
failed++;
console.warn(` WARN: Failed to build ${g.name}.wasm — ${err.message ?? 'unknown error'}`);
// Include build.message — Node.js surfaces ENOENT for spawned executables
// (e.g. `spawn emcc ENOENT`) via the error message, not stderr.
const combined = `${build.stderr}\n${build.stdout}\n${build.message}`;
if (
/emcc|emscripten|docker/i.test(combined) &&
/not found|no such|cannot find|missing|ENOENT/i.test(combined)
) {
missingToolchain = true;
}
const detail = build.stderr.trim().split('\n').slice(-2).join(' | ') || build.message;
console.warn(` WARN: Failed to build ${g.name}.wasm — ${detail}`);
continue;
}

Expand All @@ -173,6 +280,18 @@ for (const g of grammars) {
const total = failed + rejected;
if (total > 0) {
console.warn(`\n${failed} build failures, ${rejected} validation rejections out of ${grammars.length} grammars (non-fatal — native engine available)`);
if (missingToolchain) {
printBanner('WASM toolchain missing', [
"tree-sitter's `build --wasm` needs either Docker or Emscripten (emcc) to",
'compile grammars from C to WebAssembly. Neither appears to be available.',
'',
'Remediation — install one of:',
' - Docker Desktop: https://www.docker.com/products/docker-desktop',
' - Emscripten SDK: https://emscripten.org/docs/getting_started/downloads.html',
'',
'Then re-run: npm run build:wasm',
]);
}
if (rejected > 0) {
console.error('SECURITY: Some grammars were rejected — inspect the source packages before retrying.');
}
Expand Down
Loading