Skip to content

Commit 2982051

Browse files
authored
Merge pull request #11 from jsonjoy-com/partial-json
Partial JSON parser
2 parents 0db8bf8 + 44ed728 commit 2982051

File tree

5 files changed

+348
-35
lines changed

5 files changed

+348
-35
lines changed

src/json/JsonDecoder.ts

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ const isUndefined = (u8: Uint8Array, x: number) =>
107107

108108
const fromCharCode = String.fromCharCode;
109109

110-
const readShortUtf8StrAndUnescape = (reader: Reader): string => {
110+
export const readKey = (reader: Reader): string => {
111111
const buf = reader.uint8;
112112
const len = buf.length;
113113
const points: number[] = [];
@@ -202,10 +202,8 @@ export class JsonDecoder implements BinaryJsonDecoder {
202202
const uint8 = reader.uint8;
203203
const char = uint8[x];
204204
switch (char) {
205-
case 34: {
206-
// "
207-
if (uint8[x + 1] === 0x64) {
208-
// d
205+
case 34 /* " */: {
206+
if (uint8[x + 1] === 0x64 /* d */) {
209207
const bin = this.tryReadBin();
210208
if (bin) return bin;
211209
if (isUndefined(uint8, x + 2)) {
@@ -215,18 +213,18 @@ export class JsonDecoder implements BinaryJsonDecoder {
215213
}
216214
return this.readStr();
217215
}
218-
case 91: // [
216+
case 91 /* [ */:
219217
return this.readArr();
220-
case 102: // f
218+
case 102 /* f */:
221219
return this.readFalse();
222-
case 110: // n
220+
case 110 /* n */:
223221
return this.readNull();
224-
case 116: // t
222+
case 116 /* t */:
225223
return this.readTrue();
226-
case 123: // {
224+
case 123 /* { */:
227225
return this.readObj();
228226
default:
229-
if ((char >= 48 && char <= 57) || char === 45) return this.readNum();
227+
if ((char >= 48 /* 0 */ && char <= 57) /* 9 */ || char === 45 /* - */) return this.readNum();
230228
throw new Error('Invalid JSON');
231229
}
232230
}
@@ -239,10 +237,10 @@ export class JsonDecoder implements BinaryJsonDecoder {
239237
while (true) {
240238
char = uint8[x];
241239
switch (char) {
242-
case 32: // space
243-
case 9: // tab
244-
case 10: // line feed
245-
case 13: // carriage return
240+
case 32 /* <space> */:
241+
case 9 /* <tab> */:
242+
case 10 /* <line feed> */:
243+
case 13 /* <carriage return> */:
246244
x++;
247245
continue;
248246
default:
@@ -253,27 +251,27 @@ export class JsonDecoder implements BinaryJsonDecoder {
253251
}
254252

255253
public readNull(): null {
256-
if (this.reader.u32() !== 0x6e756c6c) throw new Error('Invalid JSON');
254+
if (this.reader.u32() !== 0x6e756c6c /* null */) throw new Error('Invalid JSON');
257255
return null;
258256
}
259257

260258
public readTrue(): true {
261-
if (this.reader.u32() !== 0x74727565) throw new Error('Invalid JSON');
259+
if (this.reader.u32() !== 0x74727565 /* true */) throw new Error('Invalid JSON');
262260
return true;
263261
}
264262

265263
public readFalse(): false {
266264
const reader = this.reader;
267-
if (reader.u8() !== 0x66 || reader.u32() !== 0x616c7365) throw new Error('Invalid JSON');
265+
if (reader.u8() !== 0x66 /* f */ || reader.u32() !== 0x616c7365 /* alse */) throw new Error('Invalid JSON');
268266
return false;
269267
}
270268

271269
public readBool(): unknown {
272270
const reader = this.reader;
273271
switch (reader.uint8[reader.x]) {
274-
case 102: // f
272+
case 102 /* f */:
275273
return this.readFalse();
276-
case 116: // t
274+
case 116 /* t */:
277275
return this.readTrue();
278276
default:
279277
throw new Error('Invalid JSON');
@@ -642,42 +640,44 @@ export class JsonDecoder implements BinaryJsonDecoder {
642640

643641
public readArr(): unknown[] {
644642
const reader = this.reader;
645-
if (reader.u8() !== 0x5b) throw new Error('Invalid JSON');
643+
if (reader.u8() !== 0x5b /* [ */) throw new Error('Invalid JSON');
646644
const arr: unknown[] = [];
647645
const uint8 = reader.uint8;
646+
let first = true;
648647
while (true) {
649648
this.skipWhitespace();
650649
const char = uint8[reader.x];
651-
if (char === 0x5d) return reader.x++, arr; // ]
652-
if (char === 0x2c) {
653-
reader.x++;
654-
continue;
655-
} // ,
650+
if (char === 0x5d /* ] */) return reader.x++, arr;
651+
if (char === 0x2c /* , */) reader.x++;
652+
else if (!first) throw new Error('Invalid JSON');
653+
this.skipWhitespace();
656654
arr.push(this.readAny());
655+
first = false;
657656
}
658657
}
659658

660659
public readObj(): PackValue | Record<string, unknown> | unknown {
661660
const reader = this.reader;
662-
if (reader.u8() !== 0x7b) throw new Error('Invalid JSON');
661+
if (reader.u8() !== 0x7b /* { */) throw new Error('Invalid JSON');
663662
const obj: Record<string, unknown> = {};
664663
const uint8 = reader.uint8;
664+
let first = true;
665665
while (true) {
666666
this.skipWhitespace();
667667
let char = uint8[reader.x];
668-
if (char === 0x7d) return reader.x++, obj; // }
669-
if (char === 0x2c) {
670-
reader.x++;
671-
continue;
672-
} // ,
668+
if (char === 0x7d /* } */) return reader.x++, obj;
669+
if (char === 0x2c /* , */) reader.x++;
670+
else if (!first) throw new Error('Invalid JSON');
671+
this.skipWhitespace();
673672
char = uint8[reader.x++];
674-
if (char !== 0x22) throw new Error('Invalid JSON');
675-
const key = readShortUtf8StrAndUnescape(reader);
673+
if (char !== 0x22 /* " */) throw new Error('Invalid JSON');
674+
const key = readKey(reader);
676675
if (key === '__proto__') throw new Error('Invalid JSON');
677676
this.skipWhitespace();
678-
if (reader.u8() !== 0x3a) throw new Error('Invalid JSON');
677+
if (reader.u8() !== 0x3a /* : */) throw new Error('Invalid JSON');
679678
this.skipWhitespace();
680679
obj[key] = this.readAny();
680+
first = false;
681681
}
682682
}
683683
}

src/json/JsonDecoderPartial.ts

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import {JsonDecoder, readKey} from './JsonDecoder';
2+
import type {PackValue} from '../types';
3+
4+
export class DecodeFinishError extends Error {
5+
constructor(public readonly value: unknown) {
6+
super('DECODE_FINISH');
7+
}
8+
}
9+
10+
/**
11+
* This class parses JSON which is mostly correct but not necessarily complete
12+
* or with missing parts. It can be used to parse JSON that is being streamed
13+
* in chunks or JSON output of an LLM model.
14+
*
15+
* If the end of a nested JSON value (array, object) is missing, this parser
16+
* will return the initial correct part for that value, which it was able to
17+
* parse, until the point where the JSON is no longer valid.
18+
*
19+
* Examples:
20+
*
21+
* ```js
22+
* // Missing closing brace
23+
* decoder.readAny('[1, 2, 3'); // [1, 2, 3]
24+
*
25+
* // Trailing comma and missing closing brace
26+
* decoder.readAny('[1, 2, '); // [1, 2]
27+
*
28+
* // Corrupt second element and missing closing brace
29+
* decoder.readAny('{"foo": 1, "bar":'); // {"foo": 1}
30+
* ```
31+
*/
32+
export class JsonDecoderPartial extends JsonDecoder {
33+
public readAny(): unknown {
34+
try {
35+
return super.readAny();
36+
} catch (error) {
37+
if (error instanceof DecodeFinishError) return error.value;
38+
throw error;
39+
}
40+
}
41+
42+
public readArr(): unknown[] {
43+
const reader = this.reader;
44+
if (reader.u8() !== 0x5b /* [ */) throw new Error('Invalid JSON');
45+
const arr: unknown[] = [];
46+
const uint8 = reader.uint8;
47+
let first = true;
48+
while (true) {
49+
this.skipWhitespace();
50+
const char = uint8[reader.x];
51+
if (char === 0x5d /* ] */) return reader.x++, arr;
52+
if (char === 0x2c /* , */) reader.x++;
53+
else if (!first) return arr;
54+
this.skipWhitespace();
55+
try {
56+
arr.push(this.readAny());
57+
} catch (error) {
58+
if (error instanceof DecodeFinishError) return arr.push(error.value), arr;
59+
if (error instanceof Error && error.message === 'Invalid JSON') throw new DecodeFinishError(arr);
60+
throw error;
61+
}
62+
first = false;
63+
}
64+
}
65+
66+
public readObj(): PackValue | Record<string, unknown> | unknown {
67+
const reader = this.reader;
68+
if (reader.u8() !== 0x7b /* { */) throw new Error('Invalid JSON');
69+
const obj: Record<string, unknown> = {};
70+
const uint8 = reader.uint8;
71+
while (true) {
72+
this.skipWhitespace();
73+
let char = uint8[reader.x];
74+
if (char === 0x7d /* } */) return reader.x++, obj;
75+
if (char === 0x2c /* , */) {
76+
reader.x++;
77+
continue;
78+
}
79+
try {
80+
char = uint8[reader.x++];
81+
if (char !== 0x22 /* " */) throw new Error('Invalid JSON');
82+
const key = readKey(reader);
83+
if (key === '__proto__') throw new Error('Invalid JSON');
84+
this.skipWhitespace();
85+
if (reader.u8() !== 0x3a /* : */) throw new Error('Invalid JSON');
86+
this.skipWhitespace();
87+
try {
88+
obj[key] = this.readAny();
89+
} catch (error) {
90+
if (error instanceof DecodeFinishError) {
91+
obj[key] = error.value;
92+
return obj;
93+
}
94+
throw error;
95+
}
96+
} catch (error) {
97+
if (error instanceof DecodeFinishError) return obj;
98+
if (error instanceof Error && error.message === 'Invalid JSON') throw new DecodeFinishError(obj);
99+
throw error;
100+
}
101+
}
102+
}
103+
}

src/json/__tests__/JsonDecoder.spec.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,19 @@ describe('array', () => {
322322
expect(value).toEqual([1, 2.2, -3.3]);
323323
});
324324

325+
test('simple array', () => {
326+
const data = Buffer.from('[1, 2, 3]', 'utf-8');
327+
decoder.reader.reset(data);
328+
const value = decoder.readAny();
329+
expect(value).toEqual([1, 2, 3]);
330+
});
331+
332+
test('missing comma', () => {
333+
const data = Buffer.from('[1, 2 3]', 'utf-8');
334+
decoder.reader.reset(data);
335+
expect(() => decoder.readAny()).toThrow(new Error('Invalid JSON'));
336+
});
337+
325338
test('nested arrays', () => {
326339
const data = Buffer.from(' \n \r \t [[],\n[ 4,\t5] , [null]] \n \r \t ', 'utf-8');
327340
decoder.reader.reset(data);
@@ -366,6 +379,19 @@ describe('object', () => {
366379
expect(value).toEqual({foo: 'bar'});
367380
});
368381

382+
test('simple object', () => {
383+
const data = Buffer.from('{"foo": 1, "bar": 2}', 'utf-8');
384+
decoder.reader.reset(data);
385+
const value = decoder.readAny();
386+
expect(value).toEqual({foo: 1, bar: 2});
387+
});
388+
389+
test('missing comma', () => {
390+
const data = Buffer.from('{"foo": 1 "bar": 2}', 'utf-8');
391+
decoder.reader.reset(data);
392+
expect(() => decoder.readAny()).toThrow(new Error('Invalid JSON'));
393+
});
394+
369395
test('nested object', () => {
370396
const data = Buffer.from('{"":{}}', 'utf-8');
371397
decoder.reader.reset(data);
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import {Writer} from '@jsonjoy.com/util/lib/buffers/Writer';
2+
import {JsonValue} from '../../types';
3+
import {JsonEncoder} from '../JsonEncoder';
4+
import {JsonEncoderStable} from '../JsonEncoderStable';
5+
import {JsonDecoderPartial} from '../JsonDecoderPartial';
6+
import {documents} from '../../__tests__/json-documents';
7+
import {binaryDocuments} from '../../__tests__/binary-documents';
8+
9+
const writer = new Writer(8);
10+
const encoder = new JsonEncoder(writer);
11+
const encoderStable = new JsonEncoderStable(writer);
12+
const decoder = new JsonDecoderPartial();
13+
14+
const assertEncoder = (value: JsonValue) => {
15+
const encoded = encoder.encode(value);
16+
const encoded2 = encoderStable.encode(value);
17+
// const json = Buffer.from(encoded).toString('utf-8');
18+
// console.log('json', json);
19+
const decoded = decoder.decode(encoded);
20+
const decoded2 = decoder.decode(encoded2);
21+
expect(decoded).toEqual(value);
22+
expect(decoded2).toEqual(value);
23+
};
24+
25+
describe('Sample JSON documents', () => {
26+
for (const t of documents) {
27+
(t.only ? test.only : test)(t.name, () => {
28+
assertEncoder(t.json as any);
29+
});
30+
}
31+
});
32+
33+
describe('Sample binary documents', () => {
34+
for (const t of binaryDocuments) {
35+
(t.only ? test.only : test)(t.name, () => {
36+
assertEncoder(t.json as any);
37+
});
38+
}
39+
});

0 commit comments

Comments
 (0)