diff --git a/benchmark/getRawString.js b/benchmark/getRawString.js new file mode 100644 index 0000000..5a57a4f --- /dev/null +++ b/benchmark/getRawString.js @@ -0,0 +1,124 @@ +'use strict'; + +const Benchmark = require('benchmark'); +const benchmarks = require('beautify-benchmark'); +const assert = require('assert'); + +const ByteBuffer = require('..'); +const io = ByteBuffer.allocate(1024 * 1024); + +const str = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234'; +// const str = 'com.alipay.sofa.service.hsf.service.SofaHSFRequest'; +// const str = '123456789012345678901'; +// const str = '12345678901234567890123456789012345678901234567890'; +io.putRawString(str); +const buf = io.array(); +const len = buf.length; + +// io.position(0); +// console.log(io.getRawStringByStringLength(1024)); +// console.log(buf.toString()); + +// io.position(0); +// assert(io.getRawStringByStringLength(1024) === buf.toString()); + +function getUTF(buf) { + const data = []; + const length = buf.length; + for (let i = 0; i < length; i++) { + const ch = buf[i]; + if (ch < 0x80) { + data.push(ch); + } else if ((ch & 0xe0) === 0xc0) { + const ch1 = buf[++i]; + const v = ((ch & 0x1f) << 6) + (ch1 & 0x3f); + data.push(v); + } else if ((ch & 0xf0) === 0xe0) { + const ch1 = buf[++i]; + const ch2 = buf[++i]; + const v = ((ch & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f); + data.push(v); + } else { + throw new Error('string is not valid UTF-8 encode'); + } + } + return String.fromCharCode.apply(String, data); +} + +// assert(getUTF(buf) === buf.toString()); + + +function getUTF2(buf) { + const length = buf.length; + const data = []; + let start = 0; + const numInts = length >> 2; + for (let i = 0; i < numInts; i++) { + const num = buf.readInt32BE(i * 4); + if ((num & 0x80808080) !== 0) { + throw new Error(); + } + } + const offset = start + length; + return buf.toString('utf8', 0, offset); +} + +// assert(getUTF2(buf) === buf.toString()); +// io.position(0); +// assert(io.getRawStringFast(1024) === buf.toString()); + +// io.position(0); +// assert(io.getUTFString(1024) === buf.toString()); + +io._offset = 0; +console.log(io.getRawStringByStringLength(len)); +io._offset = 0; +console.log(io.getRawStringFast(len)); +io._offset = 0; +console.log(io.getUTFString(len)); + +const suite = new Benchmark.Suite(); +suite + .add('io.getRawStringByStringLength', function() { + io._offset = 0; + io.getRawStringByStringLength(len); + }) + .add('io.getRawStringFast', function() { + io._offset = 0; + io.getRawStringFast(len); + }) + .add('io.getUTFString', function() { + io._offset = 0; + io.getUTFString(len); + }) + .add('buf.toString', function() { + buf.toString(); + }) + .add('getUTF', function() { + getUTF(buf); + }) + .add('getUTF2', function() { + getUTF2(buf); + }) + .on('cycle', function(event) { + benchmarks.add(event.target); + }) + .on('start', function(event) { + console.log('\n Cache Benchmark\n node version: %s, date: %s\n Starting...', + process.version, Date()); + }) + .on('complete', function done() { + benchmarks.log(); + }) + .run({ 'async': false }); + +// Cache Benchmark +// node version: v8.9.1, date: Tue Dec 19 2017 14:45:26 GMT+0800 (CST) +// Starting... +// 5 tests completed. + +// io.getRawStringByStringLength x 110,459 ops/sec ±1.40% (88 runs sampled) +// io.getRawStringFast x 709,949 ops/sec ±1.08% (90 runs sampled) +// buf.toString x 3,961,437 ops/sec ±0.99% (90 runs sampled) +// getUTF x 125,213 ops/sec ±1.63% (92 runs sampled) +// getUTF2 x 950,905 ops/sec ±1.20% (94 runs sampled) diff --git a/benchmark/putString.js b/benchmark/putString.js new file mode 100644 index 0000000..4ec02ee --- /dev/null +++ b/benchmark/putString.js @@ -0,0 +1,40 @@ +'use strict'; + +const Benchmark = require('benchmark'); +const benchmarks = require('beautify-benchmark'); +const assert = require('assert'); + +const ByteBuffer = require('..'); +const io = ByteBuffer.allocate(1024 * 1024); + +const str = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234'; + +io.reset(); +const a = io.putRawString(str); +io.reset(); +const b = io.putUTFString(str); + +assert.deepEqual(a, b); + +const suite = new Benchmark.Suite(); +suite + .add('putRawString', function() { + io.reset(); + io.putRawString(str); + }) + .add('putUTFString', function() { + io.reset(); + io.putUTFString(str); + }) + .on('cycle', function(event) { + benchmarks.add(event.target); + }) + .on('start', function(event) { + console.log('\n Cache Benchmark\n node version: %s, date: %s\n Starting...', + process.version, Date()); + }) + .on('complete', function done() { + benchmarks.log(); + }) + .run({ 'async': false }); + diff --git a/lib/byte.js b/lib/byte.js index 6dcbfea..0643107 100644 --- a/lib/byte.js +++ b/lib/byte.js @@ -342,6 +342,14 @@ ByteBuffer.prototype._putString = function (index, value, format) { return this; }; +ByteBuffer.prototype.putUTF8String = function (str) { + var len = str && str.length; + if (!len) { + return this; + } + return this.put(new Buffer(str)); +}; + // Prints a string to the Buffer, encoded as CESU-8 ByteBuffer.prototype.putRawString = function (index, str) { if (typeof index === 'string') { @@ -405,6 +413,37 @@ ByteBuffer.prototype._copy = function (start, end) { return buf; }; +ByteBuffer.prototype.getUTF8String = function (length) { + var start = this._offset; + this._offset += length; + return this._bytes.toString('utf8', start, this._offset); +}; + +ByteBuffer.prototype.getRawStringFast = function (length) { + // short string `getRawStringByStringLength` has better performance + if (length <= 24) return this.getRawStringByStringLength(length); + + var numInts = length >> 2; + var mod = length % 4; + var start = this._offset; + if (length > 0 && numInts === 0) return this.getRawStringByStringLength(length); + + for (var i = 0; i < numInts; i++) { + var pos = i * 4; + var num = this._bytes.readInt32BE(this._offset + pos); + if ((num & 0x80808080) !== 0) { + this._offset += i; + return pos === 0 ? + this.getRawStringByStringLength(length) : + this._bytes.toString('utf8', start, this._offset) + this.getRawStringByStringLength(length - i); + } + } + this._offset += numInts * 4; + return mod ? + this._bytes.toString('utf8', start, this._offset) + this.getRawStringByStringLength(mod) : + this._bytes.toString('utf8', start, this._offset); +}; + ByteBuffer.prototype.getRawStringByStringLength = function (index, length) { var needUpdateOffset = false; if (arguments.length === 1) { diff --git a/package.json b/package.json index ede2f38..f1099c2 100644 --- a/package.json +++ b/package.json @@ -52,7 +52,7 @@ "buffer" ], "engines": { - "node": ">= 0.10.0" + "node": ">= 0.12.0" }, "author": "fengmk2 (http://fengmk2.com)", "license": "MIT" diff --git a/test/byte.test.js b/test/byte.test.js index bbbb2ba..ec3f357 100644 --- a/test/byte.test.js +++ b/test/byte.test.js @@ -467,6 +467,10 @@ describe('byte.test.js', function () { assert(bytes.getRawString(0, 11) === 'hello world'); assert(bytes.getRawStringByStringLength(0, 11) === 'hello world'); bytes.position(0); + assert(bytes.getRawStringFast(11) === 'hello world'); + bytes.position(0); + assert(bytes.getUTF8String(11) === 'hello world'); + bytes.position(0); assert(bytes.getRawString() === 'h'); bytes = ByteBuffer.allocate(1); @@ -474,12 +478,14 @@ describe('byte.test.js', function () { assert(bytes.toString() === ''); assert(bytes.position(0).readRawString(6) === '你好'); assert(bytes.position(0).getRawString(0, 6) === '你好'); + assert(bytes.position(0).getRawStringFast(2) === '你好'); assert(bytes.position(0).getRawStringByStringLength(2) === '你好'); bytes.putRawString(0, '我们'); assert(bytes.toString() === ''); assert(bytes.getRawString(0, 6) === '我们'); assert(bytes.getRawStringByStringLength(0, 2) === '我们'); assert(bytes.readRawString(0, 6) === '我们'); + assert(bytes.position(0).getRawStringFast(2) === '我们'); bytes = ByteBuffer.allocate(1); bytes.putRawString(''); @@ -637,6 +643,7 @@ describe('byte.test.js', function () { assert(bytes.toString() === ''); assert.deepEqual(bytes.getRawString(0, 11), str); assert.deepEqual(bytes.getRawStringByStringLength(0, 7), str); + assert.deepEqual(bytes.position(0).getRawStringFast(7), str); // gbk var bytes = ByteBuffer.allocate(1); var str = 'hello\ud83c\udf3c'; @@ -644,6 +651,7 @@ describe('byte.test.js', function () { assert(bytes.toString() === ''); assert.deepEqual(bytes.getRawString(0, 11), str); assert.deepEqual(bytes.getRawStringByStringLength(0, 7), str); + assert.deepEqual(bytes.position(0).getRawStringFast(7), str); var bytes = ByteBuffer.allocate(1); // java encode bytes: [-19, -96, -67, -19, -72, -128, 87, 119, 119, -23, -126, -93] @@ -652,6 +660,7 @@ describe('byte.test.js', function () { assert(bytes.toString() === ''); assert.deepEqual(bytes.getRawString(0, 12), str); assert.deepEqual(bytes.getRawStringByStringLength(0, 6), str); + assert.deepEqual(bytes.position(0).getRawStringFast(6), str); // Construction of a special test case which triggers the bug // of allocating insufficient space via _checkSize @@ -662,6 +671,17 @@ describe('byte.test.js', function () { }); }); + describe('putUTF8String/getUTF8String', function () { + it('should put & get utf string ok', function () { + var bytes = ByteBuffer.allocate(1); + var str = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234'; + bytes.putUTF8String(str); + bytes.flip(); + var output = bytes.getUTF8String(bytes.limit()); + assert(output === str); + }); + }); + describe('array(), copy()', function () { it('should copy(start)', function () { var bytes = ByteBuffer.allocate(8); @@ -782,8 +802,13 @@ describe('byte.test.js', function () { bytes.position(0); assert(str === bytes.getRawStringByStringLength(0, str.length)); assert(bytes.position() === 0); + assert(str === bytes.getRawStringFast(str.length)); + bytes.position(0); assert(str === bytes.getRawStringByStringLength(str.length)); assert(bytes.position() === pos); + bytes.position(0); + assert(str === bytes.getRawStringFast(str.length)); + assert(bytes.position() === pos); }); it('should throw if encode error', function () { @@ -794,6 +819,10 @@ describe('byte.test.js', function () { assert.throws(function () { bytes.getRawStringByStringLength(0, str.length + 1); }, 'string is not valid UTF-8 encode'); + assert.throws(function () { + bytes.position(0) + bytes.getRawStringFast(str.length + 1); + }, 'string is not valid UTF-8 encode'); }); }); });