diff --git a/.gitignore b/.gitignore index c0a6490..17d1e6c 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ results node_modules npm-debug.log coverage/ +profile-* diff --git a/.travis.yml b/.travis.yml index 5993a31..6b284b7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,9 @@ +sudo: false language: node_js node_js: - '0.12' - - '1' - - '2' - - '3' - '4' -script: "npm run test-travis" + - '6' + - '8' +script: "npm run test-travis && npm run benchmark" after_script: "npm install coveralls@2 && cat ./coverage/lcov.info | coveralls" diff --git a/benchmark/putRawString.js b/benchmark/putRawString.js index 9c328de..d91acc0 100644 --- a/benchmark/putRawString.js +++ b/benchmark/putRawString.js @@ -10,16 +10,85 @@ largeStr += largeStr var bb = ByteBuffer.allocate(1024); var max = 10; +// putRawString + bb.putRawString(makeStr('a', 200)); -console.log('small bytes %s', bb.array().length); +console.log('putRawString: small < 0x80 bytes %s, one char length: %d', + bb.array().length, 'a'.length); +bb.reset(); + +bb.putRawString(makeStr('Θ…', 200)); +console.log('putRawString: small < 0x800 bytes %s, one char length: %d', + bb.array().length, 'Θ…'.length); +bb.reset(); + +bb.putRawString(makeStr('𐐀', 200)); +console.log('putRawString: small >= 0x800 bytes %s, one char length: %d, maxIncreaseSize: %d, bb.size: %d', + bb.array().length, '𐐀'.length, makeStr('𐐀', 200).length * 3, bb._size); +bb.reset(); + +bb.putRawString(makeStr(String.fromCharCode(0x801), 200)); +console.log('putRawString: small = 0x801 bytes %s, one char length: %d', + bb.array().length, String.fromCharCode(0x801).length); +bb.reset(); + +bb.putRawString(makeStr('δΈ­ζ–‡', 200)); +console.log('putRawString: small δΈ­ζ–‡ bytes %s, one char length: %d, maxIncreaseSize: %d, bb.size: %d', + bb.array().length, 'δΈ­ζ–‡'.length, makeStr('δΈ­ζ–‡', 200).length * 3, bb._size); +bb.reset(); + +bb.putRawString(makeStr('\ud83c\udf3c', 200)); +console.log('putRawString: small \ud83c\udf3c bytes %s, one char length: %d, maxIncreaseSize: %d, bb.size: %d', + bb.array().length, '\ud83c\udf3c'.length, makeStr('\ud83c\udf3c', 200).length * 3, bb._size); bb.reset(); bb.putRawString(makeStr(largeStr, 10)); -console.log('large bytes %s', bb.array().length); +console.log('putRawString: large bytes %s, one char length: %d', + bb.array().length); bb.reset(); +// putUTF8RawString + +bb = ByteBuffer.allocate(2); +bb.putUTF8RawString(makeStr('a', 200)); +console.log('putUTF8RawString: small < 0x80 bytes %s, one char length: %d', + bb.array().length, 'a'.length); +bb.reset(); + +bb.putUTF8RawString(makeStr('Θ…', 200)); +console.log('putUTF8RawString: small < 0x800 bytes %s, one char length: %d', + bb.array().length, 'Θ…'.length); +bb.reset(); + +bb.putUTF8RawString(makeStr('𐐀', 200)); +console.log('putUTF8RawString: small >= 0x800 bytes %s, one char length: %d, byteLength: %d, bb.size: %d', + bb.array().length, '𐐀'.length, Buffer.byteLength(makeStr('𐐀', 200)), bb._size); +bb.reset(); + +bb.putUTF8RawString(makeStr('δΈ­ζ–‡', 200)); +console.log('putUTF8RawString: small δΈ­ζ–‡ bytes %s, one char length: %d, byteLength: %d, bb.size: %d', + bb.array().length, 'δΈ­ζ–‡'.length, Buffer.byteLength(makeStr('δΈ­ζ–‡', 200)), bb._size); +bb.reset(); + +bb.putUTF8RawString(makeStr('\ud83c\udf3c', 200)); +console.log('putUTF8RawString: small \ud83c\udf3c bytes %s, one char length: %d, byteLength: %d, bb.size: %d', + bb.array().length, '\ud83c\udf3c'.length, Buffer.byteLength(makeStr('\ud83c\udf3c', 200)), bb._size); +bb.reset(); + +bb.putUTF8RawString(makeStr(String.fromCharCode(0x801), 200)); +console.log('putUTF8RawString: small = 0x801 bytes %s, one char length: %d', + bb.array().length, String.fromCharCode(0x801).length); +bb.reset(); + +bb.putUTF8RawString(makeStr(largeStr, 10)); +console.log('putUTF8RawString: large bytes %s, one char length: %d', + bb.array().length); +bb.reset(); + +bb = ByteBuffer.allocate(1024); + var run = bench([ - function putRawStringSmall(cb) { + function putRawStringSmallLessThan0x80(cb) { for (var i = 0; i < max; i++) { bb.putRawString(makeStr('a', 200)); } @@ -27,6 +96,79 @@ var run = bench([ bb.reset(); setImmediate(cb); }, + function putRawStringSmallLessThan0x800(cb) { + for (var i = 0; i < max; i++) { + bb.putRawString(makeStr('Θ…', 200)); + } + bb.array(); + bb.reset(); + setImmediate(cb); + }, + function putRawStringSmallBiggerThan0x800(cb) { + for (var i = 0; i < max; i++) { + bb.putRawString(makeStr('𐐀', 200)); + } + bb.array(); + bb.reset(); + setImmediate(cb); + }, + function putRawStringSmallChinese(cb) { + for (var i = 0; i < max; i++) { + bb.putRawString(makeStr('δΈ­ζ–‡', 200)); + } + bb.array(); + bb.reset(); + setImmediate(cb); + }, + function putRawStringSmallEmoji(cb) { + for (var i = 0; i < max; i++) { + bb.putRawString(makeStr('\ud83c\udf3c', 200)); + } + bb.array(); + bb.reset(); + setImmediate(cb); + }, + + function putUTF8RawStringSmallLessThan0x80(cb) { + for (var i = 0; i < max; i++) { + bb.putUTF8RawString(makeStr('a', 200)); + } + bb.array(); + bb.reset(); + setImmediate(cb); + }, + function putUTF8RawStringSmallLessThan0x800(cb) { + for (var i = 0; i < max; i++) { + bb.putUTF8RawString(makeStr('Θ…', 200)); + } + bb.array(); + bb.reset(); + setImmediate(cb); + }, + function putUTF8RawStringSmallBiggerThan0x800(cb) { + for (var i = 0; i < max; i++) { + bb.putUTF8RawString(makeStr('𐐀', 200)); + } + bb.array(); + bb.reset(); + setImmediate(cb); + }, + function putUTF8RawStringSmallChinese(cb) { + for (var i = 0; i < max; i++) { + bb.putUTF8RawString(makeStr('δΈ­ζ–‡', 200)); + } + bb.array(); + bb.reset(); + setImmediate(cb); + }, + function putUTF8RawStringSmallEmoji(cb) { + for (var i = 0; i < max; i++) { + bb.putUTF8RawString(makeStr('\ud83c\udf3c', 200)); + } + bb.array(); + bb.reset(); + setImmediate(cb); + }, // function putRawStringLarge(cb) { // for (var i = 0; i < max; i++) { // bb.putRawString(makeStr(largeStr, 10)); diff --git a/lib/byte.js b/lib/byte.js index f29e6cf..b14c963 100644 --- a/lib/byte.js +++ b/lib/byte.js @@ -344,7 +344,7 @@ ByteBuffer.prototype._putString = function (index, value, format) { // Prints a string to the Buffer, encoded as CESU-8 ByteBuffer.prototype.putRawString = function (index, str) { - if (typeof index === 'string') { + if (arguments.length === 1) { // putRawString(str) str = index; index = this._offset; @@ -381,12 +381,17 @@ ByteBuffer.prototype.putRawString = function (index, str) { this._bytes[index++] = ch; } else if (ch < 0x800) { // 0x800: 2048 - this._bytes[index++] = (0xc0 + ((ch >> 6) & 0x1f)) >>> 32; - this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32; + // this._bytes[index++] = (0xc0 + ((ch >> 6) & 0x1f)) >>> 32; + // this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32; + this._bytes[index++] = (ch >>> 6) | 0xc0; + this._bytes[index++] = (ch & 0x3f) | 0x80; // 0x3f => 0b00111111 } else { - this._bytes[index++] = (0xe0 + ((ch >> 12) & 0xf)) >>> 32; - this._bytes[index++] = (0x80 + ((ch >> 6) & 0x3f)) >>> 32; - this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32; + // this._bytes[index++] = (0xe0 + ((ch >> 12) & 0xf)) >>> 32; + // this._bytes[index++] = (0x80 + ((ch >> 6) & 0x3f)) >>> 32; + // this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32; + this._bytes[index++] = (ch >>> 12) | 0xe0; + this._bytes[index++] = ((ch >>> 6) & 0x3f) | 0x80; + this._bytes[index++] = (ch & 0x3f) | 0x80; } } // index is now probably less than @_offset and reflects the real length @@ -394,6 +399,24 @@ ByteBuffer.prototype.putRawString = function (index, str) { return this; }; +ByteBuffer.prototype.putUTF8RawString = function (index, str) { + var buf; + if (arguments.length === 1) { + // putUTF8RawString(str) + str = index; + index = this._offset; + buf = Buffer.from ? Buffer.from(str) : new Buffer(str); + this._checkSize(this._offset + buf.length); + buf.copy(this._bytes, index); + } else { + buf = Buffer.from ? Buffer.from(str) : new Buffer(str); + buf.copy(this._bytes, index); + } + + this._offset = index + buf.length; + return this; +}; + ByteBuffer.prototype._copy = function (start, end) { // magic number here.. // @see benchmark/buffer_slice_and_copy.js diff --git a/package.json b/package.json index 95af16e..ef79dc0 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,8 @@ "autod": "autod -w --prefix '^' -e benchmark && npm run cnpm", "cnpm": "npm install --registry=https://registry.npm.taobao.org", "contributors": "contributors -f plain -o AUTHORS", - "optimized": "node --allow-natives-syntax --trace_opt --trace_deopt test/optimized.js" + "optimized": "node --allow-natives-syntax --trace_opt --trace_deopt test/optimized.js", + "benchmark": "node benchmark/putRawString.js" }, "dependencies": { "debug": "^2.6.6", @@ -56,4 +57,4 @@ }, "author": "fengmk2 (http://fengmk2.com)", "license": "MIT" -} \ No newline at end of file +} diff --git a/test/byte.test.js b/test/byte.test.js index d3cb1ba..ef73de3 100644 --- a/test/byte.test.js +++ b/test/byte.test.js @@ -458,7 +458,7 @@ describe('byte.test.js', function () { }); }); - describe('putRawString()', function () { + describe('putRawString(), putUTF8RawString()', function () { it('should put raw string', function () { var bytes = ByteBuffer.allocate(1); bytes.putRawString('hello'); @@ -468,6 +468,11 @@ describe('byte.test.js', function () { bytes.position(0); assert(bytes.getRawString() === 'h'); + bytes = ByteBuffer.allocate(1); + bytes.putUTF8RawString('hello'); + bytes.putUTF8RawString(' world'); + assert(bytes.toString() === ''); + bytes = ByteBuffer.allocate(1); bytes.putRawString('δ½ ε₯½'); assert(bytes.toString() === ''); @@ -475,9 +480,14 @@ describe('byte.test.js', function () { bytes.putRawString(0, 'ζˆ‘δ»¬'); assert(bytes.toString() === ''); assert(bytes.getRawString(0, 6) === 'ζˆ‘δ»¬'); - assert(bytes.readRawString(0, 6) === 'ζˆ‘δ»¬'); + bytes = ByteBuffer.allocate(1); + bytes.putUTF8RawString('δ½ ε₯½'); + assert(bytes.toString() === ''); + bytes.putUTF8RawString(0, 'ζˆ‘δ»¬'); + assert(bytes.toString() === ''); + bytes = ByteBuffer.allocate(1); bytes.putRawString(''); assert(bytes.toString() === ''); @@ -486,8 +496,8 @@ describe('byte.test.js', function () { it('should 000000000xxxxxxx (0x0000 ~ 0x007f) => 0xxxxxxx (0x00 ~ 0x7f)', function() { // UTF-8 var bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0x0000)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0x0000)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0x0000)); @@ -495,8 +505,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0x0001)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0x0001)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0x0001)); @@ -504,8 +514,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString('E'); // 0x45 - assert(bytes.toString() === ''); + bytes.putUTF8RawString('E'); // 0x45 + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString('E'); @@ -513,8 +523,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0x7F)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0x7F)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0x7F)); @@ -525,8 +535,8 @@ describe('byte.test.js', function () { // UTF-8 var bytes = ByteBuffer.allocate(1); bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0x80)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0x80)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0x80)); @@ -534,8 +544,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString('Θ…'); // 0x0205: 517 - assert(bytes.toString() === ''); + bytes.putUTF8RawString('Θ…'); // 0x0205: 517 + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString('Θ…'); @@ -543,8 +553,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0x81)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0x81)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0x81)); @@ -552,8 +562,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0x7FE)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0x7FE)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0x7FE)); @@ -561,8 +571,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0x7FF)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0x7FF)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0x7FF)); @@ -573,8 +583,8 @@ describe('byte.test.js', function () { // UTF-8 var bytes = ByteBuffer.allocate(1); bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0x800)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0x800)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0x800)); @@ -582,8 +592,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0x801)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0x801)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0x801)); @@ -591,8 +601,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString('𐐀'); // 0xD801 0xDC00 - assert(bytes.toString() === ''); + bytes.putUTF8RawString('𐐀'); // 0xD801 0xDC00 + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString('𐐀'); @@ -600,8 +610,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString('\ud801\udc01'); // 0xD801 0xDC01 - assert(bytes.toString() === ''); + bytes.putUTF8RawString('\ud801\udc01'); // 0xD801 0xDC01 + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString('\ud801\udc01'); @@ -609,8 +619,8 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0xFFFE)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0xFFFE)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0xFFFE)); @@ -618,14 +628,27 @@ describe('byte.test.js', function () { // UTF-8 bytes = ByteBuffer.allocate(1); - bytes.putString(String.fromCharCode(0xFFFF)); - assert(bytes.toString() === ''); + bytes.putUTF8RawString(String.fromCharCode(0xFFFF)); + assert(bytes.toString() === ''); // CESU-8 bytes = ByteBuffer.allocate(1); bytes.putRawString(String.fromCharCode(0xFFFF)); assert(bytes.toString() === ''); }); + it('U+10000 ~ U+10FFFF', function() { + // https://en.wikipedia.org/wiki/UTF-8 + // UTF-8 + var bytes = ByteBuffer.allocate(1); + bytes = ByteBuffer.allocate(1); + bytes.putUTF8RawString('𐍈'); + assert(bytes.toString() === ''); + // CESU-8 + bytes = ByteBuffer.allocate(1); + bytes.putRawString('𐍈'); + assert(bytes.toString() === ''); + }); + it('should put emoji', function () { // utf8 var bytes = ByteBuffer.allocate(1); @@ -640,6 +663,14 @@ describe('byte.test.js', function () { assert(bytes.toString() === ''); assert.deepEqual(bytes.getRawString(0, 11), str); + var str = '\ud83c\udf3c'; + bytes = ByteBuffer.allocate(1); + bytes.putRawString(str); + assert(bytes.toString() === ''); + bytes = ByteBuffer.allocate(1); + bytes.putUTF8RawString(str); + assert(bytes.toString() === ''); + var bytes = ByteBuffer.allocate(1); // java encode bytes: [-19, -96, -67, -19, -72, -128, 87, 119, 119, -23, -126, -93] var str = '\ud83d\ude00Wwwι‚£';