Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions benchmark/getRawString.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
'use strict';

const Benchmark = require('benchmark');
const benchmarks = require('beautify-benchmark');
const assert = require('assert');

const ByteBuffer = require('..');
const io = ByteBuffer.allocate(1024 * 1024);

const str = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234';
// const str = 'com.alipay.sofa.service.hsf.service.SofaHSFRequest';
// const str = '123456789012345678901';
// const str = '12345678901234567890123456789012345678901234567890';
io.putRawString(str);
const buf = io.array();
const len = buf.length;

// io.position(0);
// console.log(io.getRawStringByStringLength(1024));
// console.log(buf.toString());

// io.position(0);
// assert(io.getRawStringByStringLength(1024) === buf.toString());

function getUTF(buf) {
const data = [];
const length = buf.length;
for (let i = 0; i < length; i++) {
const ch = buf[i];
if (ch < 0x80) {
data.push(ch);
} else if ((ch & 0xe0) === 0xc0) {
const ch1 = buf[++i];
const v = ((ch & 0x1f) << 6) + (ch1 & 0x3f);
data.push(v);
} else if ((ch & 0xf0) === 0xe0) {
const ch1 = buf[++i];
const ch2 = buf[++i];
const v = ((ch & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f);
data.push(v);
} else {
throw new Error('string is not valid UTF-8 encode');
}
}
return String.fromCharCode.apply(String, data);
}

// assert(getUTF(buf) === buf.toString());


function getUTF2(buf) {
const length = buf.length;
const data = [];
let start = 0;
const numInts = length >> 2;
for (let i = 0; i < numInts; i++) {
const num = buf.readInt32BE(i * 4);
if ((num & 0x80808080) !== 0) {
throw new Error();
}
}
const offset = start + length;
return buf.toString('utf8', 0, offset);
}

// assert(getUTF2(buf) === buf.toString());
// io.position(0);
// assert(io.getRawStringFast(1024) === buf.toString());

// io.position(0);
// assert(io.getUTFString(1024) === buf.toString());

io._offset = 0;
console.log(io.getRawStringByStringLength(len));
io._offset = 0;
console.log(io.getRawStringFast(len));
io._offset = 0;
console.log(io.getUTFString(len));

const suite = new Benchmark.Suite();
suite
.add('io.getRawStringByStringLength', function() {
io._offset = 0;
io.getRawStringByStringLength(len);
})
.add('io.getRawStringFast', function() {
io._offset = 0;
io.getRawStringFast(len);
})
.add('io.getUTFString', function() {
io._offset = 0;
io.getUTFString(len);
})
.add('buf.toString', function() {
buf.toString();
})
.add('getUTF', function() {
getUTF(buf);
})
.add('getUTF2', function() {
getUTF2(buf);
})
.on('cycle', function(event) {
benchmarks.add(event.target);
})
.on('start', function(event) {
console.log('\n Cache Benchmark\n node version: %s, date: %s\n Starting...',
process.version, Date());
})
.on('complete', function done() {
benchmarks.log();
})
.run({ 'async': false });

// Cache Benchmark
// node version: v8.9.1, date: Tue Dec 19 2017 14:45:26 GMT+0800 (CST)
// Starting...
// 5 tests completed.

// io.getRawStringByStringLength x 110,459 ops/sec ±1.40% (88 runs sampled)
// io.getRawStringFast x 709,949 ops/sec ±1.08% (90 runs sampled)
// buf.toString x 3,961,437 ops/sec ±0.99% (90 runs sampled)
// getUTF x 125,213 ops/sec ±1.63% (92 runs sampled)
// getUTF2 x 950,905 ops/sec ±1.20% (94 runs sampled)
40 changes: 40 additions & 0 deletions benchmark/putString.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
'use strict';

const Benchmark = require('benchmark');
const benchmarks = require('beautify-benchmark');
const assert = require('assert');

const ByteBuffer = require('..');
const io = ByteBuffer.allocate(1024 * 1024);

const str = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234';

io.reset();
const a = io.putRawString(str);
io.reset();
const b = io.putUTFString(str);

assert.deepEqual(a, b);

const suite = new Benchmark.Suite();
suite
.add('putRawString', function() {
io.reset();
io.putRawString(str);
})
.add('putUTFString', function() {
io.reset();
io.putUTFString(str);
})
.on('cycle', function(event) {
benchmarks.add(event.target);
})
.on('start', function(event) {
console.log('\n Cache Benchmark\n node version: %s, date: %s\n Starting...',
process.version, Date());
})
.on('complete', function done() {
benchmarks.log();
})
.run({ 'async': false });

39 changes: 39 additions & 0 deletions lib/byte.js
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,14 @@ ByteBuffer.prototype._putString = function (index, value, format) {
return this;
};

ByteBuffer.prototype.putUTF8String = function (str) {
var len = str && str.length;
if (!len) {
return this;
}
return this.put(new Buffer(str));
};

// Prints a string to the Buffer, encoded as CESU-8
ByteBuffer.prototype.putRawString = function (index, str) {
if (typeof index === 'string') {
Expand Down Expand Up @@ -405,6 +413,37 @@ ByteBuffer.prototype._copy = function (start, end) {
return buf;
};

ByteBuffer.prototype.getUTF8String = function (length) {
var start = this._offset;
this._offset += length;
return this._bytes.toString('utf8', start, this._offset);
};

ByteBuffer.prototype.getRawStringFast = function (length) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

什么时候用fast?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

测试发现,当 length 小于 24 的时候 getRawStringByStringLength 是最快的,但是长字符串 getRawStringFast 还是有优势的

Copy link
Member Author

@gxcsoccer gxcsoccer Dec 19, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. hessian 里面 readType 的时候不可能出现 unicode 字符,所以直接用 getUTF8String
  2. 用 getRawStringFast 替代 getRawStringByStringLength
    • 当 length 小于 24 时(测试结果),降级为 getRawStringByStringLength
    • 当探测到 unicode 字符,则马上降级为 getRawStringByStringLength,因为认为一旦出现 unicode,一直出现 unicode 的几率比较高
    • 其余用新的逻辑

// short string `getRawStringByStringLength` has better performance
if (length <= 24) return this.getRawStringByStringLength(length);

var numInts = length >> 2;
var mod = length % 4;
var start = this._offset;
if (length > 0 && numInts === 0) return this.getRawStringByStringLength(length);

for (var i = 0; i < numInts; i++) {
var pos = i * 4;
var num = this._bytes.readInt32BE(this._offset + pos);
if ((num & 0x80808080) !== 0) {
this._offset += i;
return pos === 0 ?
this.getRawStringByStringLength(length) :
this._bytes.toString('utf8', start, this._offset) + this.getRawStringByStringLength(length - i);
}
}
this._offset += numInts * 4;
return mod ?
this._bytes.toString('utf8', start, this._offset) + this.getRawStringByStringLength(mod) :
this._bytes.toString('utf8', start, this._offset);
};

ByteBuffer.prototype.getRawStringByStringLength = function (index, length) {
var needUpdateOffset = false;
if (arguments.length === 1) {
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"buffer"
],
"engines": {
"node": ">= 0.10.0"
"node": ">= 0.12.0"
},
"author": "fengmk2 <[email protected]> (http://fengmk2.com)",
"license": "MIT"
Expand Down
29 changes: 29 additions & 0 deletions test/byte.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -467,19 +467,25 @@ describe('byte.test.js', function () {
assert(bytes.getRawString(0, 11) === 'hello world');
assert(bytes.getRawStringByStringLength(0, 11) === 'hello world');
bytes.position(0);
assert(bytes.getRawStringFast(11) === 'hello world');
bytes.position(0);
assert(bytes.getUTF8String(11) === 'hello world');
bytes.position(0);
assert(bytes.getRawString() === 'h');

bytes = ByteBuffer.allocate(1);
bytes.putRawString('你好');
assert(bytes.toString() === '<ByteBuffer e4 bd a0 e5 a5 bd>');
assert(bytes.position(0).readRawString(6) === '你好');
assert(bytes.position(0).getRawString(0, 6) === '你好');
assert(bytes.position(0).getRawStringFast(2) === '你好');
assert(bytes.position(0).getRawStringByStringLength(2) === '你好');
bytes.putRawString(0, '我们');
assert(bytes.toString() === '<ByteBuffer e6 88 91 e4 bb ac>');
assert(bytes.getRawString(0, 6) === '我们');
assert(bytes.getRawStringByStringLength(0, 2) === '我们');
assert(bytes.readRawString(0, 6) === '我们');
assert(bytes.position(0).getRawStringFast(2) === '我们');

bytes = ByteBuffer.allocate(1);
bytes.putRawString('');
Expand Down Expand Up @@ -637,13 +643,15 @@ describe('byte.test.js', function () {
assert(bytes.toString() === '<ByteBuffer 68 65 6c 6c 6f e9 a6 83 e5 b0 b2>');
assert.deepEqual(bytes.getRawString(0, 11), str);
assert.deepEqual(bytes.getRawStringByStringLength(0, 7), str);
assert.deepEqual(bytes.position(0).getRawStringFast(7), str);
// gbk
var bytes = ByteBuffer.allocate(1);
var str = 'hello\ud83c\udf3c';
bytes.putRawString(str);
assert(bytes.toString() === '<ByteBuffer 68 65 6c 6c 6f ed a0 bc ed bc bc>');
assert.deepEqual(bytes.getRawString(0, 11), str);
assert.deepEqual(bytes.getRawStringByStringLength(0, 7), str);
assert.deepEqual(bytes.position(0).getRawStringFast(7), str);

var bytes = ByteBuffer.allocate(1);
// java encode bytes: [-19, -96, -67, -19, -72, -128, 87, 119, 119, -23, -126, -93]
Expand All @@ -652,6 +660,7 @@ describe('byte.test.js', function () {
assert(bytes.toString() === '<ByteBuffer ed a0 bd ed b8 80 57 77 77 e9 82 a3>');
assert.deepEqual(bytes.getRawString(0, 12), str);
assert.deepEqual(bytes.getRawStringByStringLength(0, 6), str);
assert.deepEqual(bytes.position(0).getRawStringFast(6), str);

// Construction of a special test case which triggers the bug
// of allocating insufficient space via _checkSize
Expand All @@ -662,6 +671,17 @@ describe('byte.test.js', function () {
});
});

describe('putUTF8String/getUTF8String', function () {
it('should put & get utf string ok', function () {
var bytes = ByteBuffer.allocate(1);
var str = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234';
bytes.putUTF8String(str);
bytes.flip();
var output = bytes.getUTF8String(bytes.limit());
assert(output === str);
});
});

describe('array(), copy()', function () {
it('should copy(start)', function () {
var bytes = ByteBuffer.allocate(8);
Expand Down Expand Up @@ -782,8 +802,13 @@ describe('byte.test.js', function () {
bytes.position(0);
assert(str === bytes.getRawStringByStringLength(0, str.length));
assert(bytes.position() === 0);
assert(str === bytes.getRawStringFast(str.length));
bytes.position(0);
assert(str === bytes.getRawStringByStringLength(str.length));
assert(bytes.position() === pos);
bytes.position(0);
assert(str === bytes.getRawStringFast(str.length));
assert(bytes.position() === pos);
});

it('should throw if encode error', function () {
Expand All @@ -794,6 +819,10 @@ describe('byte.test.js', function () {
assert.throws(function () {
bytes.getRawStringByStringLength(0, str.length + 1);
}, 'string is not valid UTF-8 encode');
assert.throws(function () {
bytes.position(0)
bytes.getRawStringFast(str.length + 1);
}, 'string is not valid UTF-8 encode');
});
});
});