Skip to content

Commit ad1a02b

Browse files
author
xiaochen.gaoxc
committed
refactor: enhance put/get string performance
1 parent 2e3d935 commit ad1a02b

File tree

5 files changed

+219
-1
lines changed

5 files changed

+219
-1
lines changed

benchmark/getRawString.js

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
'use strict';
2+
3+
const Benchmark = require('benchmark');
4+
const benchmarks = require('beautify-benchmark');
5+
const assert = require('assert');
6+
7+
const ByteBuffer = require('..');
8+
const io = ByteBuffer.allocate(1024 * 1024);
9+
10+
const str = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234';
11+
io.putRawString(str);
12+
const buf = io.array();
13+
14+
// io.position(0);
15+
// console.log(io.getRawStringByStringLength(1024));
16+
// console.log(buf.toString());
17+
18+
io.position(0);
19+
assert(io.getRawStringByStringLength(1024) === buf.toString());
20+
21+
function getUTF(buf) {
22+
const data = [];
23+
const length = buf.length;
24+
for (let i = 0; i < length; i++) {
25+
const ch = buf[i];
26+
if (ch < 0x80) {
27+
data.push(ch);
28+
} else if ((ch & 0xe0) === 0xc0) {
29+
const ch1 = buf[++i];
30+
const v = ((ch & 0x1f) << 6) + (ch1 & 0x3f);
31+
data.push(v);
32+
} else if ((ch & 0xf0) === 0xe0) {
33+
const ch1 = buf[++i];
34+
const ch2 = buf[++i];
35+
const v = ((ch & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f);
36+
data.push(v);
37+
} else {
38+
throw new Error('string is not valid UTF-8 encode');
39+
}
40+
}
41+
return String.fromCharCode.apply(String, data);
42+
}
43+
44+
assert(getUTF(buf) === buf.toString());
45+
46+
47+
function getUTF2(buf) {
48+
const length = buf.length;
49+
const data = [];
50+
let start = 0;
51+
const numInts = length >> 2;
52+
for (let i = 0; i < numInts; i++) {
53+
const num = buf.readInt32BE(i * 4);
54+
if ((num & 0x80808080) !== 0) {
55+
throw new Error();
56+
}
57+
}
58+
const offset = start + length;
59+
return buf.toString('utf8', 0, offset);
60+
}
61+
62+
assert(getUTF2(buf) === buf.toString());
63+
io.position(0);
64+
assert(io.getRawStringFast(1024) === buf.toString());
65+
66+
io.position(0);
67+
assert(io.getUTFString(1024) === buf.toString());
68+
69+
const suite = new Benchmark.Suite();
70+
suite
71+
.add('io.getRawStringByStringLength', function() {
72+
io._offset = 0;
73+
io.getRawStringByStringLength(1024);
74+
})
75+
.add('io.getRawStringFast', function() {
76+
io._offset = 0;
77+
io.getRawStringFast(1024);
78+
})
79+
.add('io.getUTFString', function() {
80+
io._offset = 0;
81+
io.getUTFString(1024);
82+
})
83+
.add('buf.toString', function() {
84+
buf.toString();
85+
})
86+
.add('getUTF', function() {
87+
getUTF(buf);
88+
})
89+
.add('getUTF2', function() {
90+
getUTF2(buf);
91+
})
92+
.on('cycle', function(event) {
93+
benchmarks.add(event.target);
94+
})
95+
.on('start', function(event) {
96+
console.log('\n Cache Benchmark\n node version: %s, date: %s\n Starting...',
97+
process.version, Date());
98+
})
99+
.on('complete', function done() {
100+
benchmarks.log();
101+
})
102+
.run({ 'async': false });
103+
104+
// Cache Benchmark
105+
// node version: v8.9.1, date: Tue Dec 19 2017 14:45:26 GMT+0800 (CST)
106+
// Starting...
107+
// 5 tests completed.
108+
109+
// io.getRawStringByStringLength x 110,459 ops/sec ±1.40% (88 runs sampled)
110+
// io.getRawStringFast x 709,949 ops/sec ±1.08% (90 runs sampled)
111+
// buf.toString x 3,961,437 ops/sec ±0.99% (90 runs sampled)
112+
// getUTF x 125,213 ops/sec ±1.63% (92 runs sampled)
113+
// getUTF2 x 950,905 ops/sec ±1.20% (94 runs sampled)

benchmark/putString.js

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
'use strict';
2+
3+
const Benchmark = require('benchmark');
4+
const benchmarks = require('beautify-benchmark');
5+
const assert = require('assert');
6+
7+
const ByteBuffer = require('..');
8+
const io = ByteBuffer.allocate(1024 * 1024);
9+
10+
const str = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234';
11+
12+
io.reset();
13+
const a = io.putRawString(str);
14+
io.reset();
15+
const b = io.putUTFString(str);
16+
17+
assert.deepEqual(a, b);
18+
19+
const suite = new Benchmark.Suite();
20+
suite
21+
.add('putRawString', function() {
22+
io.reset();
23+
io.putRawString(str);
24+
})
25+
.add('putUTFString', function() {
26+
io.reset();
27+
io.putUTFString(str);
28+
})
29+
.on('cycle', function(event) {
30+
benchmarks.add(event.target);
31+
})
32+
.on('start', function(event) {
33+
console.log('\n Cache Benchmark\n node version: %s, date: %s\n Starting...',
34+
process.version, Date());
35+
})
36+
.on('complete', function done() {
37+
benchmarks.log();
38+
})
39+
.run({ 'async': false });
40+

lib/byte.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,14 @@ ByteBuffer.prototype._putString = function (index, value, format) {
342342
return this;
343343
};
344344

345+
ByteBuffer.prototype.putUTFString = function (str) {
346+
var len = str && str.length;
347+
if (!len) {
348+
return this;
349+
}
350+
return this.put(new Buffer(str));
351+
};
352+
345353
// Prints a string to the Buffer, encoded as CESU-8
346354
ByteBuffer.prototype.putRawString = function (index, str) {
347355
if (typeof index === 'string') {
@@ -405,6 +413,34 @@ ByteBuffer.prototype._copy = function (start, end) {
405413
return buf;
406414
};
407415

416+
ByteBuffer.prototype.getUTFString = function (length) {
417+
var start = this._offset;
418+
this._offset += length;
419+
return this._bytes.toString('utf8', start, this._offset);
420+
};
421+
422+
ByteBuffer.prototype.getRawStringFast = function (length) {
423+
var numInts = length >> 2;
424+
var mod = length % 4;
425+
var start = this._offset;
426+
if (length > 0 && numInts === 0) return this.getRawStringByStringLength(length);
427+
428+
for (var i = 0; i < numInts; i++) {
429+
var pos = i * 4;
430+
var num = this._bytes.readInt32BE(this._offset + pos);
431+
if ((num & 0x80808080) !== 0) {
432+
this._offset += i;
433+
return pos === 0 ?
434+
this.getRawStringByStringLength(length) :
435+
this._bytes.toString('utf8', start, this._offset) + this.getRawStringByStringLength(length - i);
436+
}
437+
}
438+
this._offset += numInts * 4;
439+
return mod ?
440+
this._bytes.toString('utf8', start, this._offset) + this.getRawStringByStringLength(mod) :
441+
this._bytes.toString('utf8', start, this._offset);
442+
};
443+
408444
ByteBuffer.prototype.getRawStringByStringLength = function (index, length) {
409445
var needUpdateOffset = false;
410446
if (arguments.length === 1) {

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
"buffer"
5353
],
5454
"engines": {
55-
"node": ">= 0.10.0"
55+
"node": ">= 0.12.0"
5656
},
5757
"author": "fengmk2 <[email protected]> (http://fengmk2.com)",
5858
"license": "MIT"

test/byte.test.js

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,19 +467,25 @@ describe('byte.test.js', function () {
467467
assert(bytes.getRawString(0, 11) === 'hello world');
468468
assert(bytes.getRawStringByStringLength(0, 11) === 'hello world');
469469
bytes.position(0);
470+
assert(bytes.getRawStringFast(11) === 'hello world');
471+
bytes.position(0);
472+
assert(bytes.getUTFString(11) === 'hello world');
473+
bytes.position(0);
470474
assert(bytes.getRawString() === 'h');
471475

472476
bytes = ByteBuffer.allocate(1);
473477
bytes.putRawString('你好');
474478
assert(bytes.toString() === '<ByteBuffer e4 bd a0 e5 a5 bd>');
475479
assert(bytes.position(0).readRawString(6) === '你好');
476480
assert(bytes.position(0).getRawString(0, 6) === '你好');
481+
assert(bytes.position(0).getRawStringFast(2) === '你好');
477482
assert(bytes.position(0).getRawStringByStringLength(2) === '你好');
478483
bytes.putRawString(0, '我们');
479484
assert(bytes.toString() === '<ByteBuffer e6 88 91 e4 bb ac>');
480485
assert(bytes.getRawString(0, 6) === '我们');
481486
assert(bytes.getRawStringByStringLength(0, 2) === '我们');
482487
assert(bytes.readRawString(0, 6) === '我们');
488+
assert(bytes.position(0).getRawStringFast(2) === '我们');
483489

484490
bytes = ByteBuffer.allocate(1);
485491
bytes.putRawString('');
@@ -637,13 +643,15 @@ describe('byte.test.js', function () {
637643
assert(bytes.toString() === '<ByteBuffer 68 65 6c 6c 6f e9 a6 83 e5 b0 b2>');
638644
assert.deepEqual(bytes.getRawString(0, 11), str);
639645
assert.deepEqual(bytes.getRawStringByStringLength(0, 7), str);
646+
assert.deepEqual(bytes.position(0).getRawStringFast(7), str);
640647
// gbk
641648
var bytes = ByteBuffer.allocate(1);
642649
var str = 'hello\ud83c\udf3c';
643650
bytes.putRawString(str);
644651
assert(bytes.toString() === '<ByteBuffer 68 65 6c 6c 6f ed a0 bc ed bc bc>');
645652
assert.deepEqual(bytes.getRawString(0, 11), str);
646653
assert.deepEqual(bytes.getRawStringByStringLength(0, 7), str);
654+
assert.deepEqual(bytes.position(0).getRawStringFast(7), str);
647655

648656
var bytes = ByteBuffer.allocate(1);
649657
// java encode bytes: [-19, -96, -67, -19, -72, -128, 87, 119, 119, -23, -126, -93]
@@ -652,6 +660,7 @@ describe('byte.test.js', function () {
652660
assert(bytes.toString() === '<ByteBuffer ed a0 bd ed b8 80 57 77 77 e9 82 a3>');
653661
assert.deepEqual(bytes.getRawString(0, 12), str);
654662
assert.deepEqual(bytes.getRawStringByStringLength(0, 6), str);
663+
assert.deepEqual(bytes.position(0).getRawStringFast(6), str);
655664

656665
// Construction of a special test case which triggers the bug
657666
// of allocating insufficient space via _checkSize
@@ -662,6 +671,17 @@ describe('byte.test.js', function () {
662671
});
663672
});
664673

674+
describe('putUTFString/getUTFString', function () {
675+
it('should put & get utf string ok', function () {
676+
var bytes = ByteBuffer.allocate(1);
677+
var str = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234';
678+
bytes.putUTFString(str);
679+
bytes.flip();
680+
var output = bytes.getUTFString(bytes.limit());
681+
assert(output === str);
682+
});
683+
});
684+
665685
describe('array(), copy()', function () {
666686
it('should copy(start)', function () {
667687
var bytes = ByteBuffer.allocate(8);
@@ -782,8 +802,13 @@ describe('byte.test.js', function () {
782802
bytes.position(0);
783803
assert(str === bytes.getRawStringByStringLength(0, str.length));
784804
assert(bytes.position() === 0);
805+
assert(str === bytes.getRawStringFast(str.length));
806+
bytes.position(0);
785807
assert(str === bytes.getRawStringByStringLength(str.length));
786808
assert(bytes.position() === pos);
809+
bytes.position(0);
810+
assert(str === bytes.getRawStringFast(str.length));
811+
assert(bytes.position() === pos);
787812
});
788813

789814
it('should throw if encode error', function () {
@@ -794,6 +819,10 @@ describe('byte.test.js', function () {
794819
assert.throws(function () {
795820
bytes.getRawStringByStringLength(0, str.length + 1);
796821
}, 'string is not valid UTF-8 encode');
822+
assert.throws(function () {
823+
bytes.position(0)
824+
bytes.getRawStringFast(str.length + 1);
825+
}, 'string is not valid UTF-8 encode');
797826
});
798827
});
799828
});

0 commit comments

Comments
 (0)