Skip to content

Commit d6842c4

Browse files
Copilotstreamich
andcommitted
perf: optimize Avro encoder for better performance
Co-authored-by: streamich <[email protected]>
1 parent 6791eb6 commit d6842c4

File tree

2 files changed

+77
-36
lines changed

2 files changed

+77
-36
lines changed

src/avro/AvroEncoder.ts

Lines changed: 59 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,20 @@ export class AvroEncoder implements BinaryJsonEncoder {
9090
* Writes an Avro float value using IEEE 754 single-precision.
9191
*/
9292
public writeFloatAvro(float: number): void {
93-
this.writer.ensureCapacity(4);
94-
this.writer.view.setFloat32(this.writer.x, float, true); // little-endian
95-
this.writer.move(4);
93+
const writer = this.writer;
94+
writer.ensureCapacity(4);
95+
writer.view.setFloat32(writer.x, float, true); // little-endian
96+
writer.move(4);
9697
}
9798

9899
/**
99100
* Writes an Avro double value using IEEE 754 double-precision.
100101
*/
101102
public writeDouble(double: number): void {
102-
this.writer.ensureCapacity(8);
103-
this.writer.view.setFloat64(this.writer.x, double, true); // little-endian
104-
this.writer.move(8);
103+
const writer = this.writer;
104+
writer.ensureCapacity(8);
105+
writer.view.setFloat64(writer.x, double, true); // little-endian
106+
writer.move(8);
105107
}
106108

107109
/**
@@ -116,18 +118,42 @@ export class AvroEncoder implements BinaryJsonEncoder {
116118
* Writes an Avro string value with UTF-8 encoding and length prefix.
117119
*/
118120
public writeStr(str: string): void {
119-
const bytes = new TextEncoder().encode(str);
120-
this.writeVarIntUnsigned(bytes.length);
121-
this.writer.buf(bytes, bytes.length);
121+
const writer = this.writer;
122+
const maxSize = str.length * 4; // Max UTF-8 bytes for string
123+
writer.ensureCapacity(5 + maxSize); // 5 bytes max for varint length
124+
125+
// Reserve space for length (we'll come back to fill this)
126+
const lengthOffset = writer.x;
127+
writer.x += 5; // Max varint size
128+
129+
// Write the string and get actual byte count
130+
const bytesWritten = writer.utf8(str);
131+
const endPos = writer.x;
132+
133+
// Go back to encode the actual length
134+
writer.x = lengthOffset;
135+
this.writeVarIntUnsigned(bytesWritten);
136+
const actualLengthSize = writer.x - lengthOffset;
137+
138+
// If we reserved more space than needed, shift the string data
139+
if (actualLengthSize < 5) {
140+
const stringStart = lengthOffset + 5;
141+
const stringData = writer.uint8.slice(stringStart, endPos);
142+
writer.x = lengthOffset + actualLengthSize;
143+
writer.buf(stringData, stringData.length);
144+
} else {
145+
writer.x = endPos;
146+
}
122147
}
123148

124149
/**
125150
* Writes an Avro array with length-prefixed encoding.
126151
*/
127152
public writeArr(arr: unknown[]): void {
128153
this.writeVarIntUnsigned(arr.length);
129-
for (const item of arr) {
130-
this.writeAny(item);
154+
const length = arr.length;
155+
for (let i = 0; i < length; i++) {
156+
this.writeAny(arr[i]);
131157
}
132158
this.writeVarIntUnsigned(0); // End of array marker
133159
}
@@ -137,10 +163,12 @@ export class AvroEncoder implements BinaryJsonEncoder {
137163
*/
138164
public writeObj(obj: Record<string, unknown>): void {
139165
const entries = Object.entries(obj);
140-
this.writeVarIntUnsigned(entries.length);
141-
for (const [key, value] of entries) {
142-
this.writeStr(key);
143-
this.writeAny(value);
166+
const length = entries.length;
167+
this.writeVarIntUnsigned(length);
168+
for (let i = 0; i < length; i++) {
169+
const entry = entries[i];
170+
this.writeStr(entry[0]);
171+
this.writeAny(entry[1]);
144172
}
145173
this.writeVarIntUnsigned(0); // End of map marker
146174
}
@@ -187,16 +215,19 @@ export class AvroEncoder implements BinaryJsonEncoder {
187215
* Writes a float value using IEEE 754 single-precision.
188216
*/
189217
private writeFloatValue(float: number): void {
190-
this.writer.ensureCapacity(4);
191-
this.writer.view.setFloat32(this.writer.x, float, true); // little-endian
192-
this.writer.move(4);
218+
const writer = this.writer;
219+
writer.ensureCapacity(4);
220+
writer.view.setFloat32(writer.x, float, true); // little-endian
221+
writer.move(4);
193222
}
194223

195224
/**
196225
* Writes an ASCII string (same as regular string in Avro)
197226
*/
198227
public writeAsciiStr(str: string): void {
199-
this.writeStr(str);
228+
const writer = this.writer;
229+
this.writeVarIntUnsigned(str.length);
230+
writer.ascii(str);
200231
}
201232

202233
// Utility methods for Avro encoding
@@ -205,39 +236,42 @@ export class AvroEncoder implements BinaryJsonEncoder {
205236
* Encodes a variable-length integer (for signed values with zigzag)
206237
*/
207238
private writeVarIntSigned(value: number): void {
239+
const writer = this.writer;
208240
let n = value >>> 0; // Convert to unsigned 32-bit
209241
while (n >= 0x80) {
210-
this.writer.u8((n & 0x7f) | 0x80);
242+
writer.u8((n & 0x7f) | 0x80);
211243
n >>>= 7;
212244
}
213-
this.writer.u8(n & 0x7f);
245+
writer.u8(n & 0x7f);
214246
}
215247

216248
/**
217249
* Encodes a variable-length integer (for unsigned values like lengths)
218250
*/
219251
private writeVarIntUnsigned(value: number): void {
252+
const writer = this.writer;
220253
let n = value >>> 0; // Convert to unsigned 32-bit
221254
while (n >= 0x80) {
222-
this.writer.u8((n & 0x7f) | 0x80);
255+
writer.u8((n & 0x7f) | 0x80);
223256
n >>>= 7;
224257
}
225-
this.writer.u8(n & 0x7f);
258+
writer.u8(n & 0x7f);
226259
}
227260

228261
/**
229262
* Encodes a variable-length long using Avro's encoding
230263
*/
231264
private writeVarLong(value: bigint): void {
265+
const writer = this.writer;
232266
let n = value;
233267
const mask = BigInt(0x7f);
234268
const shift = BigInt(7);
235269

236270
while (n >= BigInt(0x80)) {
237-
this.writer.u8(Number((n & mask) | BigInt(0x80)));
271+
writer.u8(Number((n & mask) | BigInt(0x80)));
238272
n >>= shift;
239273
}
240-
this.writer.u8(Number(n & mask));
274+
writer.u8(Number(n & mask));
241275
}
242276

243277
/**

src/avro/AvroSchemaEncoder.ts

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import type {
1010
AvroUnionSchema,
1111
AvroFixedSchema,
1212
AvroNamedSchema,
13+
AvroNullSchema,
1314
} from './types';
1415

1516
/**
@@ -58,7 +59,7 @@ export class AvroSchemaEncoder {
5859
/**
5960
* Writes a null value with schema validation.
6061
*/
61-
public writeNull(schema: AvroSchema): void {
62+
public writeNull(schema: AvroNullSchema | AvroSchema): void {
6263
this.validateSchemaType(schema, 'null');
6364
this.encoder.writeNull();
6465
}
@@ -135,7 +136,8 @@ export class AvroSchemaEncoder {
135136
throw new Error('Schema is not a record schema');
136137
}
137138

138-
for (const field of recordSchema.fields) {
139+
for (let i = 0; i < recordSchema.fields.length; i++) {
140+
const field = recordSchema.fields[i];
139141
const fieldValue = value[field.name];
140142
if (fieldValue !== undefined) {
141143
this.writeValue(fieldValue, field.type);
@@ -185,8 +187,9 @@ export class AvroSchemaEncoder {
185187
this.writeVarIntUnsigned(value.length);
186188

187189
// Write array items
188-
for (const item of value) {
189-
this.writeValue(item, arraySchema.items);
190+
const length = value.length;
191+
for (let i = 0; i < length; i++) {
192+
this.writeValue(value[i], arraySchema.items);
190193
}
191194

192195
// Write end-of-array marker
@@ -212,9 +215,11 @@ export class AvroSchemaEncoder {
212215
this.writeVarIntUnsigned(entries.length);
213216

214217
// Write map entries
215-
for (const [key, val] of entries) {
216-
this.encoder.writeStr(key);
217-
this.writeValue(val, mapSchema.values);
218+
const length = entries.length;
219+
for (let i = 0; i < length; i++) {
220+
const entry = entries[i];
221+
this.encoder.writeStr(entry[0]);
222+
this.writeValue(entry[1], mapSchema.values);
218223
}
219224

220225
// Write end-of-map marker
@@ -424,24 +429,26 @@ export class AvroSchemaEncoder {
424429
* Writes a variable-length integer using Avro's encoding (for lengths)
425430
*/
426431
private writeVarIntUnsigned(value: number): void {
432+
const writer = this.writer;
427433
let n = value >>> 0; // Convert to unsigned 32-bit
428434
while (n >= 0x80) {
429-
this.writer.u8((n & 0x7f) | 0x80);
435+
writer.u8((n & 0x7f) | 0x80);
430436
n >>>= 7;
431437
}
432-
this.writer.u8(n & 0x7f);
438+
writer.u8(n & 0x7f);
433439
}
434440

435441
/**
436442
* Writes a variable-length integer using Avro's encoding (for signed values with zigzag)
437443
*/
438444
private writeVarIntSigned(value: number): void {
445+
const writer = this.writer;
439446
let n = value >>> 0; // Convert to unsigned 32-bit
440447
while (n >= 0x80) {
441-
this.writer.u8((n & 0x7f) | 0x80);
448+
writer.u8((n & 0x7f) | 0x80);
442449
n >>>= 7;
443450
}
444-
this.writer.u8(n & 0x7f);
451+
writer.u8(n & 0x7f);
445452
}
446453

447454
/**

0 commit comments

Comments
 (0)