Skip to content

Commit ab4b881

Browse files
authored
feat: improve Ion codec, add IonDecoder
Implement Amazon Ion Binary Decoder with Comprehensive Test Coverage
2 parents 1595212 + dc4a31f commit ab4b881

File tree

9 files changed

+1147
-444
lines changed

9 files changed

+1147
-444
lines changed

src/ion/IonDecoder.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import {Reader} from '@jsonjoy.com/util/lib/buffers/Reader';
2+
import {IonDecoderBase} from './IonDecoderBase';
3+
import {Import} from './Import';
4+
import {systemSymbolImport} from './symbols';
5+
import type {IReader, IReaderResettable} from '@jsonjoy.com/util/lib/buffers';
6+
7+
export class IonDecoder<R extends IReader & IReaderResettable = IReader & IReaderResettable> extends IonDecoderBase<R> {
8+
constructor(reader?: R) {
9+
super(reader);
10+
}
11+
12+
public decode(data: Uint8Array): unknown {
13+
this.reader.reset(data);
14+
15+
// Initialize symbol table with system symbols
16+
this.symbols = new Import(systemSymbolImport, []);
17+
18+
// Validate Binary Version Marker
19+
this.validateBVM();
20+
21+
// Read symbol table if present
22+
this.readSymbolTable();
23+
24+
// Read the main value
25+
return this.val();
26+
}
27+
28+
public read(): unknown {
29+
return this.val();
30+
}
31+
}

src/ion/IonDecoderBase.ts

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
import {Reader} from '@jsonjoy.com/util/lib/buffers/Reader';
2+
import sharedCachedUtf8Decoder from '@jsonjoy.com/util/lib/buffers/utf8/sharedCachedUtf8Decoder';
3+
import type {CachedUtf8Decoder} from '@jsonjoy.com/util/lib/buffers/utf8/CachedUtf8Decoder';
4+
import type {IReader, IReaderResettable} from '@jsonjoy.com/util/lib/buffers';
5+
import {TYPE} from './constants';
6+
import {Import} from './Import';
7+
8+
export class IonDecoderBase<R extends IReader & IReaderResettable = IReader & IReaderResettable> {
9+
public readonly reader: R;
10+
public readonly utf8Decoder: CachedUtf8Decoder;
11+
protected symbols?: Import;
12+
13+
constructor(reader?: R) {
14+
this.reader = (reader ?? new Reader()) as R;
15+
this.utf8Decoder = sharedCachedUtf8Decoder;
16+
}
17+
18+
public val(): unknown {
19+
const typedesc = this.reader.u8();
20+
const type = (typedesc >> 4) & 0xf;
21+
const length = typedesc & 0xf;
22+
23+
switch (type) {
24+
case TYPE.NULL:
25+
return this.readNull(length);
26+
case TYPE.BOOL:
27+
return this.readBool(length);
28+
case TYPE.UINT:
29+
return this.readUint(length);
30+
case TYPE.NINT:
31+
return this.readNint(length);
32+
case TYPE.FLOT:
33+
return this.readFloat(length);
34+
case TYPE.STRI:
35+
return this.readString(length);
36+
case TYPE.BINA:
37+
return this.readBinary(length);
38+
case TYPE.LIST:
39+
return this.readList(length);
40+
case TYPE.STRU:
41+
return this.readStruct(length);
42+
case TYPE.ANNO:
43+
return this.readAnnotation(length);
44+
default:
45+
throw new Error(`Unknown Ion type: 0x${type.toString(16)}`);
46+
}
47+
}
48+
49+
protected readNull(length: number): null {
50+
if (length === 15) return null;
51+
if (length === 0) {
52+
// NOP padding - skip bytes
53+
this.val(); // Read and discard next value
54+
return null;
55+
}
56+
if (length === 14) {
57+
// Extended length NOP padding
58+
const padLength = this.readVUint();
59+
this.reader.x += padLength;
60+
this.val(); // Read and discard next value
61+
return null;
62+
}
63+
// Regular NOP padding
64+
this.reader.x += length;
65+
this.val(); // Read and discard next value
66+
return null;
67+
}
68+
69+
protected readBool(length: number): boolean | null {
70+
if (length === 15) return null;
71+
if (length === 0) return false;
72+
if (length === 1) return true;
73+
throw new Error(`Invalid bool length: ${length}`);
74+
}
75+
76+
protected readUint(length: number): number | null {
77+
if (length === 15) return null;
78+
if (length === 0) return 0;
79+
80+
let value = 0;
81+
for (let i = 0; i < length; i++) {
82+
value = value * 256 + this.reader.u8();
83+
}
84+
return value;
85+
}
86+
87+
protected readNint(length: number): number | null {
88+
if (length === 15) return null;
89+
if (length === 0) throw new Error('Negative zero is illegal');
90+
91+
let value = 0;
92+
for (let i = 0; i < length; i++) {
93+
value = value * 256 + this.reader.u8();
94+
}
95+
return -value;
96+
}
97+
98+
protected readFloat(length: number): number | null {
99+
if (length === 15) return null;
100+
if (length === 0) return 0.0;
101+
if (length === 4) return this.reader.f32();
102+
if (length === 8) return this.reader.f64();
103+
throw new Error(`Unsupported float length: ${length}`);
104+
}
105+
106+
protected readString(length: number): string | null {
107+
if (length === 15) return null;
108+
109+
let actualLength = length;
110+
if (length === 14) {
111+
actualLength = this.readVUint();
112+
}
113+
114+
if (actualLength === 0) return '';
115+
116+
return this.reader.utf8(actualLength);
117+
}
118+
119+
protected readBinary(length: number): Uint8Array | null {
120+
if (length === 15) return null;
121+
122+
let actualLength = length;
123+
if (length === 14) {
124+
actualLength = this.readVUint();
125+
}
126+
127+
if (actualLength === 0) return new Uint8Array(0);
128+
129+
return this.reader.buf(actualLength);
130+
}
131+
132+
protected readList(length: number): unknown[] | null {
133+
if (length === 15) return null;
134+
135+
let actualLength = length;
136+
if (length === 14) {
137+
actualLength = this.readVUint();
138+
}
139+
140+
if (actualLength === 0) return [];
141+
142+
const endPos = this.reader.x + actualLength;
143+
const list: unknown[] = [];
144+
145+
while (this.reader.x < endPos) {
146+
list.push(this.val());
147+
}
148+
149+
if (this.reader.x !== endPos) {
150+
throw new Error('List parsing error: incorrect length');
151+
}
152+
153+
return list;
154+
}
155+
156+
protected readStruct(length: number): Record<string, unknown> | null {
157+
if (length === 15) return null;
158+
159+
let actualLength = length;
160+
if (length === 14) {
161+
actualLength = this.readVUint();
162+
}
163+
164+
if (actualLength === 0) return {};
165+
166+
const endPos = this.reader.x + actualLength;
167+
const struct: Record<string, unknown> = {};
168+
169+
while (this.reader.x < endPos) {
170+
const fieldNameId = this.readVUint();
171+
const fieldName = this.getSymbolText(fieldNameId);
172+
const fieldValue = this.val();
173+
struct[fieldName] = fieldValue;
174+
}
175+
176+
if (this.reader.x !== endPos) {
177+
throw new Error('Struct parsing error: incorrect length');
178+
}
179+
180+
return struct;
181+
}
182+
183+
protected readAnnotation(length: number): unknown {
184+
if (length < 3) {
185+
throw new Error('Annotation wrapper must have at least 3 bytes');
186+
}
187+
188+
let actualLength = length;
189+
if (length === 14) {
190+
actualLength = this.readVUint();
191+
}
192+
193+
const annotLength = this.readVUint();
194+
const endAnnotPos = this.reader.x + annotLength;
195+
196+
// Skip annotations for now - just read and ignore them
197+
while (this.reader.x < endAnnotPos) {
198+
this.readVUint(); // Skip annotation symbol ID
199+
}
200+
201+
if (this.reader.x !== endAnnotPos) {
202+
throw new Error('Annotation parsing error: incorrect annotation length');
203+
}
204+
205+
// Return the actual value, ignoring annotations
206+
return this.val();
207+
}
208+
209+
protected readVUint(): number {
210+
let value = 0;
211+
let byte: number;
212+
213+
do {
214+
byte = this.reader.u8();
215+
value = (value << 7) | (byte & 0x7f);
216+
} while ((byte & 0x80) === 0);
217+
218+
return value;
219+
}
220+
221+
protected readVInt(): number {
222+
const firstByte = this.reader.u8();
223+
224+
// Single byte case
225+
if (firstByte & 0x80) {
226+
const sign = firstByte & 0x40 ? -1 : 1;
227+
const magnitude = firstByte & 0x3f;
228+
return sign * magnitude;
229+
}
230+
231+
// Multi-byte case
232+
const sign = firstByte & 0x40 ? -1 : 1;
233+
let magnitude = firstByte & 0x3f;
234+
let byte: number;
235+
236+
do {
237+
byte = this.reader.u8();
238+
magnitude = (magnitude << 7) | (byte & 0x7f);
239+
} while ((byte & 0x80) === 0);
240+
241+
return sign * magnitude;
242+
}
243+
244+
protected getSymbolText(symbolId: number): string {
245+
if (!this.symbols) {
246+
throw new Error('No symbol table available');
247+
}
248+
249+
const symbol = this.symbols.getText(symbolId);
250+
if (symbol === undefined) {
251+
throw new Error(`Unknown symbol ID: ${symbolId}`);
252+
}
253+
254+
return symbol;
255+
}
256+
257+
protected validateBVM(): void {
258+
const bvm = this.reader.u32();
259+
if (bvm !== 0xe00100ea) {
260+
throw new Error(`Invalid Ion Binary Version Marker: 0x${bvm.toString(16)}`);
261+
}
262+
}
263+
264+
protected readSymbolTable(): void {
265+
// Check if there's enough data and if the next byte indicates an annotation
266+
if (this.reader.x < this.reader.uint8.length) {
267+
const nextByte = this.reader.peak();
268+
const type = (nextByte >> 4) & 0xf;
269+
270+
if (type === TYPE.ANNO) {
271+
// This might be a symbol table annotation
272+
const annotValue = this.val();
273+
274+
// The annotated value should be a struct with a 'symbols' field
275+
if (annotValue && typeof annotValue === 'object' && !Array.isArray(annotValue)) {
276+
const symbolsKey = 'symbols'; // This is what symbol ID 7 maps to
277+
const obj = annotValue as Record<string, unknown>;
278+
279+
if (symbolsKey in obj && Array.isArray(obj[symbolsKey])) {
280+
// Update the symbol table with new symbols
281+
const newSymbols = obj[symbolsKey] as string[];
282+
this.symbols = new Import(this.symbols || null, newSymbols);
283+
}
284+
}
285+
}
286+
}
287+
}
288+
}

src/ion/README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,44 @@
1+
# Amazon Ion Binary Codec
2+
3+
This library provides high-performance Amazon Ion binary format encoding and decoding capabilities.
4+
5+
## Usage
6+
7+
```typescript
8+
import {IonEncoderFast, IonDecoder} from '@jsonjoy.com/json-pack/lib/ion';
9+
10+
const encoder = new IonEncoderFast();
11+
const decoder = new IonDecoder();
12+
13+
const data = {users: [{name: 'Alice', age: 30}], count: 1};
14+
const encoded = encoder.encode(data);
15+
const decoded = decoder.decode(encoded);
16+
```
17+
18+
## Important Usage Notes
19+
20+
⚠️ **Instance Reuse Limitation**: Due to internal state management with shared UTF-8 decoders, encoder and decoder instances should **not be reused** across multiple encode/decode operations with complex data. For reliable operation, create fresh instances for each encoding/decoding operation:
21+
22+
```typescript
23+
// ❌ DON'T: Reuse instances for multiple operations
24+
const encoder = new IonEncoderFast();
25+
const decoder = new IonDecoder();
26+
for (const item of items) {
27+
const encoded = encoder.encode(item); // May cause state corruption
28+
const decoded = decoder.decode(encoded);
29+
}
30+
31+
// ✅ DO: Create fresh instances for each operation
32+
for (const item of items) {
33+
const encoder = new IonEncoderFast();
34+
const decoder = new IonDecoder();
35+
const encoded = encoder.encode(item);
36+
const decoded = decoder.decode(encoded);
37+
}
38+
```
39+
40+
This limitation primarily affects complex nested objects with many string keys. Simple data structures may work with reused instances, but fresh instances are recommended for guaranteed correctness.
41+
142
## Benchmarks
243

344
Encoding:

0 commit comments

Comments
 (0)