Skip to content

Commit 02f4635

Browse files
authored
Fix node:sqlite V8 crash on invalid UTF-8 in text columns (#272)
node:sqlite calls v8::String::NewFromUtf8 with kAbort on TEXT columns. Cursor chat blobs often contain truncated multi-byte chars from streaming boundaries, which triggers a V8 CHECK abort (not a JS exception). Select all text-content columns as CAST(col AS BLOB) so node:sqlite returns Uint8Array instead. Decode in JS with TextDecoder fatal:false which replaces bad bytes with U+FFFD. Covers all three SQLite providers (Cursor, Goose, OpenCode). Removes the version blocklist (MIN_NODE_22_PATCH) and lowers engines requirement from >=22.20 to >=22 since the BLOB cast approach works on all Node 22.x versions. Closes #264 Closes #250
1 parent d142bd9 commit 02f4635

6 files changed

Lines changed: 102 additions & 77 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
"developer-tools"
3131
],
3232
"engines": {
33-
"node": ">=22.20"
33+
"node": ">=22"
3434
},
3535
"author": "AgentSeal <hello@agentseal.org>",
3636
"license": "MIT",

src/providers/cursor.ts

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { homedir } from 'os'
44

55
import { calculateCost } from '../models.js'
66
import { readCachedResults, writeCachedResults } from '../cursor-cache.js'
7-
import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js'
7+
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, type SqliteDatabase } from '../sqlite.js'
88
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
99

1010
const CURSOR_COST_MODEL = 'claude-sonnet-4-5'
@@ -33,16 +33,16 @@ type BubbleRow = {
3333
model: string | null
3434
created_at: string | null
3535
conversation_id: string | null
36-
user_text: string | null
36+
user_text: Uint8Array | string | null
3737
text_length: number | null
3838
bubble_type: number | null
39-
code_blocks: string | null
39+
code_blocks: Uint8Array | string | null
4040
}
4141

4242
type AgentKvRow = {
4343
key: string
4444
role: string | null
45-
content: string | null
45+
content: Uint8Array | string | null
4646
request_id: string | null
4747
content_length: number
4848
}
@@ -291,10 +291,10 @@ const BUBBLE_QUERY_BASE = `
291291
json_extract(value, '$.modelInfo.modelName') as model,
292292
json_extract(value, '$.createdAt') as created_at,
293293
json_extract(value, '$.conversationId') as conversation_id,
294-
substr(json_extract(value, '$.text'), 1, 500) as user_text,
294+
CAST(substr(json_extract(value, '$.text'), 1, 500) AS BLOB) as user_text,
295295
length(json_extract(value, '$.text')) as text_length,
296296
json_extract(value, '$.type') as bubble_type,
297-
json_extract(value, '$.codeBlocks') as code_blocks
297+
CAST(json_extract(value, '$.codeBlocks') AS BLOB) as code_blocks
298298
FROM cursorDiskKV
299299
WHERE key LIKE 'bubbleId:%'
300300
`
@@ -303,7 +303,7 @@ const AGENTKV_QUERY = `
303303
SELECT
304304
key,
305305
json_extract(value, '$.role') as role,
306-
json_extract(value, '$.content') as content,
306+
CAST(json_extract(value, '$.content') AS BLOB) as content,
307307
json_extract(value, '$.providerOptions.cursor.requestId') as request_id,
308308
length(value) as content_length
309309
FROM cursorDiskKV
@@ -316,7 +316,7 @@ const USER_MESSAGES_QUERY = `
316316
SELECT
317317
json_extract(value, '$.conversationId') as conversation_id,
318318
json_extract(value, '$.createdAt') as created_at,
319-
substr(json_extract(value, '$.text'), 1, 500) as text
319+
CAST(substr(json_extract(value, '$.text'), 1, 500) AS BLOB) as text
320320
FROM cursorDiskKV
321321
WHERE key LIKE 'bubbleId:%'
322322
AND json_extract(value, '$.type') = 1
@@ -346,7 +346,7 @@ function validateSchema(db: SqliteDatabase): boolean {
346346
}
347347
}
348348

349-
type UserMsgRow = { conversation_id: string; created_at: string; text: string }
349+
type UserMsgRow = { conversation_id: string; created_at: string; text: Uint8Array | string }
350350

351351
/// Per-conversation user-message buffer. We pop messages in arrival order via
352352
/// the `pos` cursor — a previous implementation called Array.shift() which is
@@ -363,11 +363,12 @@ function buildUserMessageMap(db: SqliteDatabase, timeFloor: string): Map<string,
363363
const rows = db.query<UserMsgRow>(USER_MESSAGES_QUERY, [timeFloor])
364364
for (const row of rows) {
365365
if (!row.conversation_id || !row.text) continue
366+
const text = blobToText(row.text)
366367
const existing = map.get(row.conversation_id)
367368
if (existing) {
368-
existing.messages.push(row.text)
369+
existing.messages.push(text)
369370
} else {
370-
map.set(row.conversation_id, { messages: [row.text], pos: 0 })
371+
map.set(row.conversation_id, { messages: [text], pos: 0 })
371372
}
372373
}
373374
} catch {}
@@ -488,10 +489,10 @@ function parseBubbles(db: SqliteDatabase, seenKeys: Set<string>): { calls: Parse
488489

489490
const timestamp = createdAt || new Date().toISOString()
490491
const userQuestion = takeUserMessage(userMessages, conversationId)
491-
const assistantText = row.user_text ?? ''
492+
const assistantText = blobToText(row.user_text)
492493
const userText = (userQuestion + ' ' + assistantText).trim()
493494

494-
const languages = extractLanguages(row.code_blocks)
495+
const languages = extractLanguages(blobToText(row.code_blocks))
495496
const hasCode = languages.length > 0
496497

497498
const cursorTools: string[] = hasCode ? ['cursor:edit', ...languages.map(l => `lang:${l}`)] : []
@@ -572,20 +573,21 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set<string>, dbPath: string)
572573

573574
for (const row of rows) {
574575
if (!row.role || !row.content) continue
576+
const contentText = blobToText(row.content)
575577

576578
let content: AgentKvContent[]
577579
let plainTextLength = 0
578580
try {
579-
const parsed = JSON.parse(row.content)
581+
const parsed = JSON.parse(contentText)
580582
if (Array.isArray(parsed)) {
581583
content = parsed
582584
} else {
583585
content = []
584-
plainTextLength = row.content.length
586+
plainTextLength = contentText.length
585587
}
586588
} catch {
587589
content = []
588-
plainTextLength = row.content.length
590+
plainTextLength = contentText.length
589591
}
590592

591593
const requestId = row.request_id ?? currentRequestId
@@ -601,7 +603,7 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set<string>, dbPath: string)
601603
const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '' }
602604
existing.inputChars += textLength
603605
if (!existing.userText) {
604-
const text = content[0]?.text ?? row.content
606+
const text = content[0]?.text ?? contentText
605607
const queryMatch = text.match(/<user_query>([\s\S]*?)<\/user_query>/)
606608
existing.userText = queryMatch ? queryMatch[1].trim().slice(0, 500) : text.slice(0, 500)
607609
}

src/providers/goose.ts

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { homedir, platform } from 'os'
33

44
import { calculateCost, getShortModelName } from '../models.js'
55
import { extractBashCommands } from '../bash-utils.js'
6-
import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js'
6+
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, type SqliteDatabase } from '../sqlite.js'
77
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
88

99
type SessionRow = {
@@ -15,7 +15,7 @@ type SessionRow = {
1515
accumulated_input_tokens: number | null
1616
accumulated_output_tokens: number | null
1717
provider_name: string | null
18-
model_config_json: string | null
18+
model_config_json: Uint8Array | string | null
1919
}
2020

2121
type ModelConfig = {
@@ -26,7 +26,7 @@ type ModelConfig = {
2626
type MessageRow = {
2727
message_id: string
2828
role: string
29-
content_json: string
29+
content_json: Uint8Array | string
3030
created_timestamp: number
3131
}
3232

@@ -86,15 +86,15 @@ function extractToolsFromMessages(db: SqliteDatabase, sessionId: string): { tool
8686
const seen = new Set<string>()
8787

8888
try {
89-
const rows = db.query<{ content_json: string }>(
90-
"SELECT content_json FROM messages WHERE session_id = ? AND role = 'assistant' AND content_json LIKE '%toolRequest%'",
89+
const rows = db.query<{ content_json: Uint8Array | string }>(
90+
"SELECT CAST(content_json AS BLOB) AS content_json FROM messages WHERE session_id = ? AND role = 'assistant' AND content_json LIKE '%toolRequest%'",
9191
[sessionId],
9292
)
9393

9494
for (const row of rows) {
9595
let items: ContentItem[]
9696
try {
97-
items = JSON.parse(row.content_json) as ContentItem[]
97+
items = JSON.parse(blobToText(row.content_json)) as ContentItem[]
9898
} catch {
9999
continue
100100
}
@@ -124,12 +124,12 @@ function extractToolsFromMessages(db: SqliteDatabase, sessionId: string): { tool
124124

125125
function getFirstUserMessage(db: SqliteDatabase, sessionId: string): string {
126126
try {
127-
const rows = db.query<{ content_json: string }>(
128-
"SELECT content_json FROM messages WHERE session_id = ? AND role = 'user' ORDER BY created_timestamp ASC LIMIT 1",
127+
const rows = db.query<{ content_json: Uint8Array | string }>(
128+
"SELECT CAST(content_json AS BLOB) AS content_json FROM messages WHERE session_id = ? AND role = 'user' ORDER BY created_timestamp ASC LIMIT 1",
129129
[sessionId],
130130
)
131131
if (rows.length === 0) return ''
132-
const items = JSON.parse(rows[0]!.content_json) as ContentItem[]
132+
const items = JSON.parse(blobToText(rows[0]!.content_json)) as ContentItem[]
133133
const text = items.find(i => i.type === 'text') as { text?: string } | undefined
134134
return (text?.text ?? '').slice(0, 500)
135135
} catch {
@@ -161,7 +161,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
161161
if (!validateSchema(db)) return
162162

163163
const rows = db.query<SessionRow>(
164-
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, model_config_json FROM sessions WHERE id = ?',
164+
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, CAST(model_config_json AS BLOB) AS model_config_json FROM sessions WHERE id = ?',
165165
[sessionId],
166166
)
167167
if (rows.length === 0) return
@@ -175,7 +175,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
175175
if (seenKeys.has(dedupKey)) return
176176
seenKeys.add(dedupKey)
177177

178-
const config = parseModelConfig(session.model_config_json)
178+
const config = parseModelConfig(blobToText(session.model_config_json))
179179
const model = config.model_name ?? 'unknown'
180180
const costUSD = calculateCost(model, inputTokens, outputTokens, 0, 0, 0)
181181

@@ -223,7 +223,7 @@ async function discoverFromDb(dbPath: string): Promise<SessionSource[]> {
223223

224224
try {
225225
const rows = db.query<SessionRow>(
226-
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, model_config_json FROM sessions ORDER BY updated_at DESC',
226+
'SELECT id, name, working_dir, created_at, updated_at, accumulated_input_tokens, accumulated_output_tokens, provider_name, CAST(model_config_json AS BLOB) AS model_config_json FROM sessions ORDER BY updated_at DESC',
227227
)
228228

229229
return rows

src/providers/opencode.ts

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { homedir } from 'os'
44

55
import { calculateCost, getShortModelName } from '../models.js'
66
import { extractBashCommands } from '../bash-utils.js'
7-
import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js'
7+
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, type SqliteDatabase } from '../sqlite.js'
88
import type {
99
Provider,
1010
SessionSource,
@@ -15,18 +15,18 @@ import type {
1515
type MessageRow = {
1616
id: string
1717
time_created: number
18-
data: string
18+
data: Uint8Array | string
1919
}
2020

2121
type PartRow = {
2222
message_id: string
23-
data: string
23+
data: Uint8Array | string
2424
}
2525

2626
type SessionRow = {
2727
id: string
28-
directory: string
29-
title: string
28+
directory: Uint8Array | string
29+
title: Uint8Array | string
3030
time_created: number
3131
}
3232

@@ -169,19 +169,19 @@ function createParser(
169169
}
170170

171171
const messages = db.query<MessageRow>(
172-
'SELECT id, time_created, data FROM message WHERE session_id = ? ORDER BY time_created ASC',
172+
'SELECT id, time_created, CAST(data AS BLOB) AS data FROM message WHERE session_id = ? ORDER BY time_created ASC',
173173
[sessionId],
174174
)
175175

176176
const parts = db.query<PartRow>(
177-
'SELECT message_id, data FROM part WHERE session_id = ? ORDER BY message_id, id',
177+
'SELECT message_id, CAST(data AS BLOB) AS data FROM part WHERE session_id = ? ORDER BY message_id, id',
178178
[sessionId],
179179
)
180180

181181
const partsByMsg = new Map<string, PartData[]>()
182182
for (const part of parts) {
183183
try {
184-
const parsed = JSON.parse(part.data) as PartData
184+
const parsed = JSON.parse(blobToText(part.data)) as PartData
185185
const list = partsByMsg.get(part.message_id) ?? []
186186
list.push(parsed)
187187
partsByMsg.set(part.message_id, list)
@@ -195,7 +195,7 @@ function createParser(
195195
for (const msg of messages) {
196196
let data: MessageData
197197
try {
198-
data = JSON.parse(msg.data) as MessageData
198+
data = JSON.parse(blobToText(msg.data)) as MessageData
199199
} catch {
200200
continue
201201
}
@@ -294,14 +294,18 @@ async function discoverFromDb(dbPath: string): Promise<SessionSource[]> {
294294

295295
try {
296296
const rows = db.query<SessionRow>(
297-
'SELECT id, directory, title, time_created FROM session WHERE time_archived IS NULL AND parent_id IS NULL ORDER BY time_created DESC',
297+
'SELECT id, CAST(directory AS BLOB) AS directory, CAST(title AS BLOB) AS title, time_created FROM session WHERE time_archived IS NULL AND parent_id IS NULL ORDER BY time_created DESC',
298298
)
299299

300-
return rows.map((row) => ({
301-
path: `${dbPath}:${row.id}`,
302-
project: row.directory ? sanitize(row.directory) : sanitize(row.title),
303-
provider: 'opencode',
304-
}))
300+
return rows.map((row) => {
301+
const dir = blobToText(row.directory)
302+
const title = blobToText(row.title)
303+
return {
304+
path: `${dbPath}:${row.id}`,
305+
project: dir ? sanitize(dir) : sanitize(title),
306+
provider: 'opencode',
307+
}
308+
})
305309
} catch {
306310
return []
307311
} finally {

src/sqlite.ts

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,18 @@ let DatabaseSync: DatabaseSyncCtor | null = null
2323
let loadAttempted = false
2424
let loadError: string | null = null
2525

26-
/// Minimum Node 22.x patch version that contains the node:sqlite UTF-8 fix.
27-
/// Older 22.x lines crash with `Check failed: (location_) != nullptr` when a
28-
/// SQLite TEXT column returns bytes that V8's String::NewFromUtf8 rejects —
29-
/// commonly the case for Cursor's text blobs (truncated multi-byte chars at
30-
/// streaming boundaries) and OpenCode message text (rich tooling output).
31-
/// Track of issue: https://github.com/getagentseal/codeburn/issues/264
32-
/// Track of upstream: https://github.com/nodejs/node — fix landed in 22.x via
33-
/// later patches; stable on Node 24+.
34-
const MIN_NODE_22_PATCH = 20
35-
36-
function checkBuggyNodeVersion(): string | null {
37-
const match = /^v(\d+)\.(\d+)\.(\d+)/.exec(process.version)
38-
if (!match) return null
39-
const major = parseInt(match[1]!, 10)
40-
const minor = parseInt(match[2]!, 10)
41-
if (major === 22 && minor < MIN_NODE_22_PATCH) {
42-
return (
43-
`codeburn: Node ${process.version} ships an older node:sqlite that crashes on ` +
44-
`non-UTF-8 bytes in Cursor/OpenCode session text. Upgrade to Node 22.${MIN_NODE_22_PATCH}+ ` +
45-
`or 24+ to avoid the V8 fatal error. (https://nodejs.org)`
46-
)
47-
}
48-
return null
26+
const textDecoder = new TextDecoder('utf-8', { fatal: false })
27+
28+
/// Safely decode a BLOB column (Uint8Array) to a UTF-8 string. Node's
29+
/// node:sqlite crashes with a V8 CHECK abort when a TEXT column contains
30+
/// invalid UTF-8 (common in Cursor chat blobs with truncated multi-byte
31+
/// chars). By selecting those columns as `CAST(... AS BLOB)` in SQL, we
32+
/// get a Uint8Array here and decode it in JS where bad bytes become the
33+
/// U+FFFD replacement character instead of aborting the process.
34+
export function blobToText(value: Uint8Array | string | null | undefined): string {
35+
if (value == null) return ''
36+
if (typeof value === 'string') return value
37+
return textDecoder.decode(value)
4938
}
5039

5140
/// Lazily imports `node:sqlite`. On Node 22/23 it emits an ExperimentalWarning the first
@@ -56,15 +45,6 @@ function loadDriver(): boolean {
5645
if (loadAttempted) return DatabaseSync !== null
5746
loadAttempted = true
5847

59-
// Refuse to load on a Node version known to crash mid-query. Treating the
60-
// SQLite providers as unavailable is much friendlier than letting the user
61-
// hit a V8 CHECK abort that takes down the whole CLI.
62-
const versionWarning = checkBuggyNodeVersion()
63-
if (versionWarning !== null) {
64-
loadError = versionWarning
65-
return false
66-
}
67-
6848
const origEmit = process.emit.bind(process)
6949
let restored = false
7050
const restore = () => {

0 commit comments

Comments
 (0)