Skip to content

Commit 2a052c5

Browse files
authored
feat: improve putRawString performance (#27)
ignore using Buffer.byteLength(str) use big memory to exchange better performence result: run 10000 * 10 putRawString from 869ms down to 489ms
1 parent 648867b commit 2a052c5

File tree

4 files changed

+239
-37
lines changed

4 files changed

+239
-37
lines changed

benchmark/putRawString.js

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
'use strict';
2+
3+
var bench = require('fastbench');
4+
var ByteBuffer = require('..');
5+
6+
var largeStr = JSON.stringify(require('../package.json'))
7+
largeStr += largeStr
8+
largeStr += largeStr
9+
10+
var bb = ByteBuffer.allocate(1024);
11+
var max = 10;
12+
13+
bb.putRawString(makeStr('a', 200));
14+
console.log('small bytes %s', bb.array().length);
15+
bb.reset();
16+
17+
bb.putRawString(makeStr(largeStr, 10));
18+
console.log('large bytes %s', bb.array().length);
19+
bb.reset();
20+
21+
var run = bench([
22+
function putRawStringSmall(cb) {
23+
for (var i = 0; i < max; i++) {
24+
bb.putRawString(makeStr('a', 200));
25+
}
26+
bb.array();
27+
bb.reset();
28+
setImmediate(cb);
29+
},
30+
// function putRawStringLarge(cb) {
31+
// for (var i = 0; i < max; i++) {
32+
// bb.putRawString(makeStr(largeStr, 10));
33+
// }
34+
// bb.array();
35+
// bb.reset();
36+
// setImmediate(cb);
37+
// },
38+
], 10000);
39+
40+
run(run);
41+
42+
function makeStr(str, concats) {
43+
var s = ''
44+
while (concats--) {
45+
s += str
46+
}
47+
return s
48+
}
49+
50+
// before:
51+
// putRawStringSmall*10000: 912.743ms
52+
// putRawStringSmall*10000: 869.517ms
53+
54+
// after:
55+
// putRawStringSmall*10000: 502.805ms
56+
// putRawStringSmall*10000: 489.996ms

lib/byte.js

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,4 @@
1-
/*!
2-
* byte - lib/byte.js
3-
*
4-
* Copyright(c) 2013 - 2014
5-
* MIT Licensed
6-
*
7-
* Authors:
8-
* fengmk2 <[email protected]> (http://fengmk2.github.com)
9-
* dead-horse <[email protected]> (https://github.com/dead-horse)
10-
*/
11-
12-
"use strict";
13-
14-
/**
15-
* Module dependencies.
16-
*/
1+
'use strict';
172

183
var Long = require('long');
194
var debug = require('debug')('byte');
@@ -24,6 +9,7 @@ var DEFAULT_SIZE = 1024;
249
var BIG_ENDIAN = 1;
2510
var LITTLE_ENDIAN = 2;
2611
var MAX_INT_31 = Math.pow(2, 31);
12+
var ONE_HUNDRED_MB = 100 * 1024 * 1024;
2713

2814
function ByteBuffer(options) {
2915
options = options || {};
@@ -71,7 +57,12 @@ ByteBuffer.prototype._checkSize = function (afterSize) {
7157
this._size = afterSize * 2;
7258
this._limit = this._size;
7359
debug('allocate new Buffer: from %d to %d bytes', old, this._size);
74-
var bytes = new Buffer(this._size);
60+
var bytes;
61+
if (Buffer.allocUnsafe) {
62+
bytes = Buffer.allocUnsafe(this._size);
63+
} else {
64+
bytes = new Buffer(this._size);
65+
}
7566
this._bytes.copy(bytes, 0);
7667
this._bytes = bytes;
7768
};
@@ -359,17 +350,37 @@ ByteBuffer.prototype.putRawString = function (index, str) {
359350
index = this._offset;
360351
// Note that an UTF-8 encoder will encode a character that is outside BMP
361352
// as 4 bytes, yet a CESU-8 encoder will encode as 6 bytes, ergo 6 / 4 = 1.5
362-
this._checkSize(this._offset + Math.ceil(Buffer.byteLength(str) * 1.5));
353+
// @see https://en.wikipedia.org/wiki/CESU-8
354+
// this._checkSize(this._offset + Math.ceil(Buffer.byteLength(str) * 1.5));
355+
356+
// use big memory to exchange better performence
357+
// one char => max bytes is 3
358+
var maxIncreaseSize = str.length * 3;
359+
if (maxIncreaseSize > ONE_HUNDRED_MB) {
360+
maxIncreaseSize = Math.ceil(Buffer.byteLength(str) * 1.5);
361+
}
362+
this._checkSize(this._offset + maxIncreaseSize);
363363
}
364364

365-
if (!str || str.length === 0) {
365+
// CESU-8 Bit Distribution
366+
// @see http://www.unicode.org/reports/tr26/
367+
//
368+
// UTF-16 Code Unit | 1st Byte | 2nd Byte | 3rd Byte
369+
// 000000000xxxxxxx (0x0000 ~ 0x007f) | 0xxxxxxx (0x00 ~ 0x7f) | |
370+
// 00000yyyyyxxxxxx (0x0080 ~ 0x07ff) | 110yyyyy (0xc0 ~ 0xdf) | 10xxxxxx (0x80 ~ 0xbf) |
371+
// zzzzyyyyyyxxxxxx (0x0800 ~ 0xffff) | 1110zzzz (0xe0 ~ 0xef) | 10yyyyyy (0x80 ~ 0xbf) | 10xxxxxx (0x80 ~ 0xbf)
372+
373+
var len = str && str.length;
374+
if (!len) {
366375
return this;
367376
}
368-
for (var i = 0, len = str.length; i < len; i++) {
377+
for (var i = 0; i < len; i++) {
369378
var ch = str.charCodeAt(i);
379+
// 0x80: 128
370380
if (ch < 0x80) {
371-
this._bytes[index++] = ch >>> 32;
381+
this._bytes[index++] = ch;
372382
} else if (ch < 0x800) {
383+
// 0x800: 2048
373384
this._bytes[index++] = (0xc0 + ((ch >> 6) & 0x1f)) >>> 32;
374385
this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32;
375386
} else {

package.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
"description": "Input Buffer and Output Buffer, just like Java ByteBuffer",
55
"main": "lib/byte.js",
66
"files": [
7-
"lib/"
7+
"lib"
88
],
99
"scripts": {
10-
"test": "mocha -R spec -t 5000 test/*.test.js",
10+
"test": "mocha --require intelli-espower-loader -R spec -t 5000 test/*.test.js",
1111
"test-cov": "node node_modules/.bin/istanbul cover node_modules/.bin/_mocha -- -t 5000 test/*.test.js",
1212
"test-travis": "node node_modules/.bin/istanbul cover node_modules/.bin/_mocha --report lcovonly -- -t 5000 test/*.test.js",
1313
"jshint": "jshint .",
@@ -26,10 +26,13 @@
2626
"beautify-benchmark": "0",
2727
"benchmark": "1",
2828
"contributors": "*",
29+
"fastbench": "^1.0.1",
30+
"intelli-espower-loader": "^1.0.1",
2931
"istanbul": "*",
3032
"jshint": "*",
3133
"mocha": "*",
32-
"optimized": "^1.2.0"
34+
"optimized": "^1.2.0",
35+
"power-assert": "^1.4.4"
3336
},
3437
"homepage": "https://github.com/node-modules/byte",
3538
"repository": {

test/byte.test.js

Lines changed: 145 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,4 @@
1-
/*!
2-
* byte - test/byte.test.js
3-
*
4-
* Copyright(c) 2013 - 2014
5-
* MIT Licensed
6-
*
7-
* Authors:
8-
* fengmk2 <[email protected]> (http://fengmk2.github.com)
9-
* dead-horse <[email protected]> (https://github.com/dead-horse)
10-
*/
11-
12-
"use strict";
1+
'use strict';
132

143
var Long = require('long');
154
var assert = require('assert');
@@ -494,6 +483,149 @@ describe('byte.test.js', function () {
494483
assert(bytes.toString() === '<ByteBuffer>');
495484
});
496485

486+
it('should 000000000xxxxxxx (0x0000 ~ 0x007f) => 0xxxxxxx (0x00 ~ 0x7f)', function() {
487+
// UTF-8
488+
var bytes = ByteBuffer.allocate(1);
489+
bytes.putString(String.fromCharCode(0x0000));
490+
assert(bytes.toString() === '<ByteBuffer 00 00 00 01 00>');
491+
// CESU-8
492+
bytes = ByteBuffer.allocate(1);
493+
bytes.putRawString(String.fromCharCode(0x0000));
494+
assert(bytes.toString() === '<ByteBuffer 00>');
495+
496+
// UTF-8
497+
bytes = ByteBuffer.allocate(1);
498+
bytes.putString(String.fromCharCode(0x0001));
499+
assert(bytes.toString() === '<ByteBuffer 00 00 00 01 01>');
500+
// CESU-8
501+
bytes = ByteBuffer.allocate(1);
502+
bytes.putRawString(String.fromCharCode(0x0001));
503+
assert(bytes.toString() === '<ByteBuffer 01>');
504+
505+
// UTF-8
506+
bytes = ByteBuffer.allocate(1);
507+
bytes.putString('E'); // 0x45
508+
assert(bytes.toString() === '<ByteBuffer 00 00 00 01 45>');
509+
// CESU-8
510+
bytes = ByteBuffer.allocate(1);
511+
bytes.putRawString('E');
512+
assert(bytes.toString() === '<ByteBuffer 45>');
513+
514+
// UTF-8
515+
bytes = ByteBuffer.allocate(1);
516+
bytes.putString(String.fromCharCode(0x7F));
517+
assert(bytes.toString() === '<ByteBuffer 00 00 00 01 7f>');
518+
// CESU-8
519+
bytes = ByteBuffer.allocate(1);
520+
bytes.putRawString(String.fromCharCode(0x7F));
521+
assert(bytes.toString() === '<ByteBuffer 7f>');
522+
});
523+
524+
it('should 00000yyyyyxxxxxx (0x0080 ~ 0x07ff) => 110yyyyy (0xc0 ~ 0xdf) | 10xxxxxx (0x80 ~ 0xbf)', function() {
525+
// UTF-8
526+
var bytes = ByteBuffer.allocate(1);
527+
bytes = ByteBuffer.allocate(1);
528+
bytes.putString(String.fromCharCode(0x80));
529+
assert(bytes.toString() === '<ByteBuffer 00 00 00 02 c2 80>');
530+
// CESU-8
531+
bytes = ByteBuffer.allocate(1);
532+
bytes.putRawString(String.fromCharCode(0x80));
533+
assert(bytes.toString() === '<ByteBuffer c2 80>');
534+
535+
// UTF-8
536+
bytes = ByteBuffer.allocate(1);
537+
bytes.putString('ȅ'); // 0x0205: 517
538+
assert(bytes.toString() === '<ByteBuffer 00 00 00 02 c8 85>');
539+
// CESU-8
540+
bytes = ByteBuffer.allocate(1);
541+
bytes.putRawString('ȅ');
542+
assert(bytes.toString() === '<ByteBuffer c8 85>');
543+
544+
// UTF-8
545+
bytes = ByteBuffer.allocate(1);
546+
bytes.putString(String.fromCharCode(0x81));
547+
assert(bytes.toString() === '<ByteBuffer 00 00 00 02 c2 81>');
548+
// CESU-8
549+
bytes = ByteBuffer.allocate(1);
550+
bytes.putRawString(String.fromCharCode(0x81));
551+
assert(bytes.toString() === '<ByteBuffer c2 81>');
552+
553+
// UTF-8
554+
bytes = ByteBuffer.allocate(1);
555+
bytes.putString(String.fromCharCode(0x7FE));
556+
assert(bytes.toString() === '<ByteBuffer 00 00 00 02 df be>');
557+
// CESU-8
558+
bytes = ByteBuffer.allocate(1);
559+
bytes.putRawString(String.fromCharCode(0x7FE));
560+
assert(bytes.toString() === '<ByteBuffer df be>');
561+
562+
// UTF-8
563+
bytes = ByteBuffer.allocate(1);
564+
bytes.putString(String.fromCharCode(0x7FF));
565+
assert(bytes.toString() === '<ByteBuffer 00 00 00 02 df bf>');
566+
// CESU-8
567+
bytes = ByteBuffer.allocate(1);
568+
bytes.putRawString(String.fromCharCode(0x7FF));
569+
assert(bytes.toString() === '<ByteBuffer df bf>');
570+
});
571+
572+
it('should zzzzyyyyyyxxxxxx (0x0800 ~ 0xffff) => 1110zzzz (0xe0 ~ 0xef) | 10yyyyyy (0x80 ~ 0xbf) | 10xxxxxx (0x80 ~ 0xbf)', function() {
573+
// UTF-8
574+
var bytes = ByteBuffer.allocate(1);
575+
bytes = ByteBuffer.allocate(1);
576+
bytes.putString(String.fromCharCode(0x800));
577+
assert(bytes.toString() === '<ByteBuffer 00 00 00 03 e0 a0 80>');
578+
// CESU-8
579+
bytes = ByteBuffer.allocate(1);
580+
bytes.putRawString(String.fromCharCode(0x800));
581+
assert(bytes.toString() === '<ByteBuffer e0 a0 80>');
582+
583+
// UTF-8
584+
bytes = ByteBuffer.allocate(1);
585+
bytes.putString(String.fromCharCode(0x801));
586+
assert(bytes.toString() === '<ByteBuffer 00 00 00 03 e0 a0 81>');
587+
// CESU-8
588+
bytes = ByteBuffer.allocate(1);
589+
bytes.putRawString(String.fromCharCode(0x801));
590+
assert(bytes.toString() === '<ByteBuffer e0 a0 81>');
591+
592+
// UTF-8
593+
bytes = ByteBuffer.allocate(1);
594+
bytes.putString('𐐀'); // 0xD801 0xDC00
595+
assert(bytes.toString() === '<ByteBuffer 00 00 00 04 f0 90 90 80>');
596+
// CESU-8
597+
bytes = ByteBuffer.allocate(1);
598+
bytes.putRawString('𐐀');
599+
assert(bytes.toString() === '<ByteBuffer ed a0 81 ed b0 80>');
600+
601+
// UTF-8
602+
bytes = ByteBuffer.allocate(1);
603+
bytes.putString('\ud801\udc01'); // 0xD801 0xDC01
604+
assert(bytes.toString() === '<ByteBuffer 00 00 00 04 f0 90 90 81>');
605+
// CESU-8
606+
bytes = ByteBuffer.allocate(1);
607+
bytes.putRawString('\ud801\udc01');
608+
assert(bytes.toString() === '<ByteBuffer ed a0 81 ed b0 81>');
609+
610+
// UTF-8
611+
bytes = ByteBuffer.allocate(1);
612+
bytes.putString(String.fromCharCode(0xFFFE));
613+
assert(bytes.toString() === '<ByteBuffer 00 00 00 03 ef bf be>');
614+
// CESU-8
615+
bytes = ByteBuffer.allocate(1);
616+
bytes.putRawString(String.fromCharCode(0xFFFE));
617+
assert(bytes.toString() === '<ByteBuffer ef bf be>');
618+
619+
// UTF-8
620+
bytes = ByteBuffer.allocate(1);
621+
bytes.putString(String.fromCharCode(0xFFFF));
622+
assert(bytes.toString() === '<ByteBuffer 00 00 00 03 ef bf bf>');
623+
// CESU-8
624+
bytes = ByteBuffer.allocate(1);
625+
bytes.putRawString(String.fromCharCode(0xFFFF));
626+
assert(bytes.toString() === '<ByteBuffer ef bf bf>');
627+
});
628+
497629
it('should put emoji', function () {
498630
// utf8
499631
var bytes = ByteBuffer.allocate(1);
@@ -514,7 +646,7 @@ describe('byte.test.js', function () {
514646
bytes.putRawString(str);
515647
assert(bytes.toString() === '<ByteBuffer ed a0 bd ed b8 80 57 77 77 e9 82 a3>');
516648
assert.deepEqual(bytes.getRawString(0, 12), str);
517-
649+
518650
// Construction of a special test case which triggers the bug
519651
// of allocating insufficient space via _checkSize
520652
var bytes = ByteBuffer.allocate(4);

0 commit comments

Comments
 (0)