From f4602c96c244d3582ad1b5c363bcddf58f6b106f Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 08:23:30 -0600 Subject: [PATCH 01/14] install download instead of unzip package --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 9ea8aa1b..148e0f4f 100644 --- a/package.json +++ b/package.json @@ -35,11 +35,11 @@ }, "devDependencies": { "chai": "^2.0.0", + "download": "^8.0.0", "mocha": "^2.3.4", "random-seed": "^0.2.0", "standard": "^4.5.4", - "temp": "^0.8.3", - "unzip": "^0.1.11" + "temp": "^0.8.3" }, "standard": { "global": [ From 8d8f32354284cc3ec5bae47a4d39c2365e48bf6b Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 08:24:04 -0600 Subject: [PATCH 02/14] fix getText by using download package --- .gitignore | 2 ++ benchmark/large-text-buffer.benchmark.js | 45 +++++++++--------------- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 51f142db..53fdf6b5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ build /browser.js emsdk-portable package-lock.json + +benchmark/*.csv diff --git a/benchmark/large-text-buffer.benchmark.js b/benchmark/large-text-buffer.benchmark.js index 95106bb3..6cac11d0 100644 --- a/benchmark/large-text-buffer.benchmark.js +++ b/benchmark/large-text-buffer.benchmark.js @@ -1,33 +1,22 @@ -const http = require('http') -const fs = require('fs') -const unzip = require('unzip') const { TextBuffer } = require('..') -const unzipper = unzip.Parse() - -const getText = () => { - return new Promise(resolve => { - console.log('fetching text file...') - const req = http.get({ - hostname: 'www.acleddata.com', - port: 80, - // 51 MB text file - path: '/wp-content/uploads/2017/01/ACLED-Version-7-All-Africa-1997-2016_csv_dyadic-file.zip', - agent: false - }, res => { - res - .pipe(unzipper) - .on('entry', entry => { - let data = ''; - entry.on('data', chunk => data += chunk); - entry.on('end', () => { - resolve(data) - }); - }) - }) - - req.end() - }) +const fs = require('fs') +const {promisify} = require('util') +const readFile = promisify(fs.readFile) +const path = require('path') +const download = require('download') + +async function getText() { + const filePath = path.join(__dirname, '1000000 Sales Records.csv') + if (!fs.existsSync(filePath)) { + // 122MB file + await download( + 'http://eforexcel.com/wp/wp-content/uploads/2017/07/1000000%20Sales%20Records.zip', + __dirname, + {extract: true} + ) + } + return await readFile(filePath) } const timer = size => `Time to find "cat" in ${size} file` From 592a10dc4ec8b9ce63361cc1e7b97d61a9c0d398 Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 08:24:14 -0600 Subject: [PATCH 03/14] run the benchmark for the full size --- benchmark/large-text-buffer.benchmark.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/large-text-buffer.benchmark.js b/benchmark/large-text-buffer.benchmark.js index 6cac11d0..37378617 100644 --- a/benchmark/large-text-buffer.benchmark.js +++ b/benchmark/large-text-buffer.benchmark.js @@ -26,7 +26,7 @@ getText().then(txt => { console.log('running findWordsWithSubsequence tests...') - const sizes = [['10b', 10], ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000]] + const sizes = [['10b', 10], ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000], ['119MB', txt.length]] const test = size => { const _timer = timer(size[0]) From 38a70893e85675832c4cb20f8bcdd18c1bf032e4 Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 08:25:32 -0600 Subject: [PATCH 04/14] add large-text benchmark to npm benchmark script --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 148e0f4f..e53f5415 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ "test:node": "mocha test/js/*.js", "test:browser": "SUPERSTRING_USE_BROWSER_VERSION=1 mocha test/js/*.js", "test": "npm run test:node && npm run test:browser", - "benchmark": "node benchmark/marker-index.benchmark.js", + "benchmark": "node benchmark/marker-index.benchmark.js && node benchmark/large-text-buffer.benchmark.js", "prepublishOnly": "git submodule update --init --recursive && npm run build:browser", "standard": "standard --recursive src test" }, From f4eb0a5f59aaa0528a06fe244b2dd46bdf3122a5 Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 08:45:13 -0600 Subject: [PATCH 05/14] use performance from perf_hooks to measure time --- benchmark/large-text-buffer.benchmark.js | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/benchmark/large-text-buffer.benchmark.js b/benchmark/large-text-buffer.benchmark.js index 37378617..0e9ad125 100644 --- a/benchmark/large-text-buffer.benchmark.js +++ b/benchmark/large-text-buffer.benchmark.js @@ -5,6 +5,7 @@ const {promisify} = require('util') const readFile = promisify(fs.readFile) const path = require('path') const download = require('download') +const {performance} = require("perf_hooks") async function getText() { const filePath = path.join(__dirname, '1000000 Sales Records.csv') @@ -19,8 +20,6 @@ async function getText() { return await readFile(filePath) } -const timer = size => `Time to find "cat" in ${size} file` - getText().then(txt => { const buffer = new TextBuffer() @@ -28,17 +27,18 @@ getText().then(txt => { const sizes = [['10b', 10], ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000], ['119MB', txt.length]] - const test = size => { - const _timer = timer(size[0]) + const word = "Morocco" + const test = (word, size) => { buffer.setText(txt.slice(0, size[1])) - console.time(_timer) - return buffer.findWordsWithSubsequence('cat', '', 100).then(sugs => { - console.timeEnd(_timer) + const ti = performance.now() + return buffer.findWordsWithSubsequence(word, '', 100).then(sugs => { + const tf = performance.now() + console.log(`Time to find "${word}" in ${size[0]} file: ${(tf-ti).toFixed(5)} ms`) }) } return sizes.reduce((promise, size) => { - return promise.then(() => test(size)) + return promise.then(() => test(word, size)) }, Promise.resolve()) }).then(() => { console.log('finished') From 6cbc91244ba29f12f77e9a874a713bd84c8590be Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 08:45:39 -0600 Subject: [PATCH 06/14] remove 10b test (Very small to find anything meaningful) --- benchmark/large-text-buffer.benchmark.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/large-text-buffer.benchmark.js b/benchmark/large-text-buffer.benchmark.js index 0e9ad125..f833aa4b 100644 --- a/benchmark/large-text-buffer.benchmark.js +++ b/benchmark/large-text-buffer.benchmark.js @@ -25,7 +25,7 @@ getText().then(txt => { console.log('running findWordsWithSubsequence tests...') - const sizes = [['10b', 10], ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000], ['119MB', txt.length]] + const sizes = [ ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000], ['119MB', txt.length]] const word = "Morocco" const test = (word, size) => { From 79d838ce1bc98ed4e4e5d215cab54376b6e82af0 Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 08:48:26 -0600 Subject: [PATCH 07/14] run the tests for different words --- benchmark/large-text-buffer.benchmark.js | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/benchmark/large-text-buffer.benchmark.js b/benchmark/large-text-buffer.benchmark.js index f833aa4b..eb0f1ad2 100644 --- a/benchmark/large-text-buffer.benchmark.js +++ b/benchmark/large-text-buffer.benchmark.js @@ -23,11 +23,10 @@ async function getText() { getText().then(txt => { const buffer = new TextBuffer() - console.log('running findWordsWithSubsequence tests...') + console.log('\n running findWordsWithSubsequence tests... \n') const sizes = [ ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000], ['119MB', txt.length]] - const word = "Morocco" const test = (word, size) => { buffer.setText(txt.slice(0, size[1])) const ti = performance.now() @@ -37,9 +36,11 @@ getText().then(txt => { }) } - return sizes.reduce((promise, size) => { - return promise.then(() => test(word, size)) - }, Promise.resolve()) + for (const word of ["Morocco", "Austria", "France", "Liechtenstein", "Republic of the Congo", "Antigua and Barbuda", "Japan"]) { + sizes.reduce((promise, size) => { + return promise.then(() => test(word, size)) + }, Promise.resolve()) + } }).then(() => { - console.log('finished') + console.log('findWordsWithSubsequence tests finished \n') }) From 93348883b97482710c170584a8ddc79e6c23359a Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 08:57:59 -0600 Subject: [PATCH 08/14] pretty print the result --- benchmark/large-text-buffer.benchmark.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/large-text-buffer.benchmark.js b/benchmark/large-text-buffer.benchmark.js index eb0f1ad2..81babe25 100644 --- a/benchmark/large-text-buffer.benchmark.js +++ b/benchmark/large-text-buffer.benchmark.js @@ -32,7 +32,7 @@ getText().then(txt => { const ti = performance.now() return buffer.findWordsWithSubsequence(word, '', 100).then(sugs => { const tf = performance.now() - console.log(`Time to find "${word}" in ${size[0]} file: ${(tf-ti).toFixed(5)} ms`) + console.log(`In ${size[0]} file, time to find "${word}" was: ${' '.repeat(50-word.length-size[0].length)} ${(tf-ti).toFixed(5)} ms`) }) } From 3ab437dbab7fa2c1aeee089291d4ce03fba31ab1 Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 09:02:58 -0600 Subject: [PATCH 09/14] make test runner async + run same size tests together --- benchmark/large-text-buffer.benchmark.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmark/large-text-buffer.benchmark.js b/benchmark/large-text-buffer.benchmark.js index 81babe25..c69d6139 100644 --- a/benchmark/large-text-buffer.benchmark.js +++ b/benchmark/large-text-buffer.benchmark.js @@ -20,7 +20,7 @@ async function getText() { return await readFile(filePath) } -getText().then(txt => { +getText().then(async (txt) => { const buffer = new TextBuffer() console.log('\n running findWordsWithSubsequence tests... \n') @@ -35,11 +35,11 @@ getText().then(txt => { console.log(`In ${size[0]} file, time to find "${word}" was: ${' '.repeat(50-word.length-size[0].length)} ${(tf-ti).toFixed(5)} ms`) }) } - - for (const word of ["Morocco", "Austria", "France", "Liechtenstein", "Republic of the Congo", "Antigua and Barbuda", "Japan"]) { - sizes.reduce((promise, size) => { - return promise.then(() => test(word, size)) - }, Promise.resolve()) + for (const size of sizes) { + for (const word of ["Morocco", "Austria", "France", "Liechtenstein", "Republic of the Congo", "Antigua and Barbuda", "Japan"]) { + await test(word, size) + } + console.log('\n') } }).then(() => { console.log('findWordsWithSubsequence tests finished \n') From 154867658f2195565a2bff58ba6cfcfac3ad5dbf Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 09:05:00 -0600 Subject: [PATCH 10/14] make test function async --- benchmark/large-text-buffer.benchmark.js | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/benchmark/large-text-buffer.benchmark.js b/benchmark/large-text-buffer.benchmark.js index c69d6139..5a0f0cee 100644 --- a/benchmark/large-text-buffer.benchmark.js +++ b/benchmark/large-text-buffer.benchmark.js @@ -27,13 +27,12 @@ getText().then(async (txt) => { const sizes = [ ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000], ['119MB', txt.length]] - const test = (word, size) => { + const test = async (word, size) => { buffer.setText(txt.slice(0, size[1])) const ti = performance.now() - return buffer.findWordsWithSubsequence(word, '', 100).then(sugs => { - const tf = performance.now() - console.log(`In ${size[0]} file, time to find "${word}" was: ${' '.repeat(50-word.length-size[0].length)} ${(tf-ti).toFixed(5)} ms`) - }) + await buffer.findWordsWithSubsequence(word, '', 100) + const tf = performance.now() + console.log(`In ${size[0]} file, time to find "${word}" was: ${' '.repeat(50-word.length-size[0].length)} ${(tf-ti).toFixed(5)} ms`) } for (const size of sizes) { for (const word of ["Morocco", "Austria", "France", "Liechtenstein", "Republic of the Congo", "Antigua and Barbuda", "Japan"]) { From 928fa1cea66631b4dd08bccb247816c22b5d1fbf Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 09:55:06 -0600 Subject: [PATCH 11/14] benchmark buffer.setText --- benchmark/large-text-buffer.benchmark.js | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/benchmark/large-text-buffer.benchmark.js b/benchmark/large-text-buffer.benchmark.js index 5a0f0cee..1fca4037 100644 --- a/benchmark/large-text-buffer.benchmark.js +++ b/benchmark/large-text-buffer.benchmark.js @@ -23,23 +23,31 @@ async function getText() { getText().then(async (txt) => { const buffer = new TextBuffer() - console.log('\n running findWordsWithSubsequence tests... \n') + console.log('\n running large-text-buffer tests... \n') const sizes = [ ['100b', 100], ['1kb', 1000], ['1MB', 1000000], ['51MB', 100000000], ['119MB', txt.length]] const test = async (word, size) => { - buffer.setText(txt.slice(0, size[1])) - const ti = performance.now() + const ti2 = performance.now() await buffer.findWordsWithSubsequence(word, '', 100) - const tf = performance.now() - console.log(`In ${size[0]} file, time to find "${word}" was: ${' '.repeat(50-word.length-size[0].length)} ${(tf-ti).toFixed(5)} ms`) + const tf2 = performance.now() + console.log(`For ${size[0]} file, time to find "${word}" was: ${' '.repeat(50-word.length-size[0].length)} ${(tf2-ti2).toFixed(5)} ms`) } for (const size of sizes) { + + const bufferText = txt.slice(0, size[1]) + + // benchmark buffer.setText + const ti1 = performance.now() + buffer.setText(bufferText) + const tf1 = performance.now() + console.log(`For ${size[0]} file, buffer.setText took ${' '.repeat(51-size[0].length)} ${(tf1-ti1).toFixed(5)} ms`) + for (const word of ["Morocco", "Austria", "France", "Liechtenstein", "Republic of the Congo", "Antigua and Barbuda", "Japan"]) { await test(word, size) } console.log('\n') } }).then(() => { - console.log('findWordsWithSubsequence tests finished \n') + console.log(' large-text-buffer finished \n') }) From cdd5de27adff19f27efc0c994c77df951abaaa84 Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 16:54:32 -0600 Subject: [PATCH 12/14] fix text-buffer benchmark --- benchmark/text-buffer.benchmark.js | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/text-buffer.benchmark.js b/benchmark/text-buffer.benchmark.js index 36152b1f..10e98605 100644 --- a/benchmark/text-buffer.benchmark.js +++ b/benchmark/text-buffer.benchmark.js @@ -10,7 +10,7 @@ function benchmarkSearch(description, pattern, expectedPosition) { let name = `Search for ${description} - TextBuffer` console.time(name) for (let i = 0; i < trialCount; i++) { - assert.deepEqual(buffer.searchSync(pattern), expectedPosition) + assert.deepEqual(buffer.findSync(pattern), expectedPosition) } console.timeEnd(name) diff --git a/package.json b/package.json index e53f5415..c5ad7dcb 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ "test:node": "mocha test/js/*.js", "test:browser": "SUPERSTRING_USE_BROWSER_VERSION=1 mocha test/js/*.js", "test": "npm run test:node && npm run test:browser", - "benchmark": "node benchmark/marker-index.benchmark.js && node benchmark/large-text-buffer.benchmark.js", + "benchmark": "node benchmark/text-buffer.benchmark.js && node benchmark/marker-index.benchmark.js && node benchmark/large-text-buffer.benchmark.js", "prepublishOnly": "git submodule update --init --recursive && npm run build:browser", "standard": "standard --recursive src test" }, From f5792c73f62a7540918d4d23ce281a2fd0ee03f2 Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 17:03:46 -0600 Subject: [PATCH 13/14] use performance in text-buffer benchmarks + prettry print --- benchmark/text-buffer.benchmark.js | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/benchmark/text-buffer.benchmark.js b/benchmark/text-buffer.benchmark.js index 10e98605..21b63147 100644 --- a/benchmark/text-buffer.benchmark.js +++ b/benchmark/text-buffer.benchmark.js @@ -1,4 +1,8 @@ +console.log(' running text-buffer tests... \n') + const assert = require('assert') +const {performance} = require("perf_hooks") + const {TextBuffer} = require('..') const text = 'abc def ghi jkl\n'.repeat(1024 * 1024) @@ -8,14 +12,15 @@ const trialCount = 10 function benchmarkSearch(description, pattern, expectedPosition) { let name = `Search for ${description} - TextBuffer` - console.time(name) + const ti1 = performance.now() for (let i = 0; i < trialCount; i++) { assert.deepEqual(buffer.findSync(pattern), expectedPosition) } - console.timeEnd(name) + const tf1 = performance.now() + console.log(`${name} ${' '.repeat(80-name.length)} ${(tf1-ti1).toFixed(3)} ms`) name = `Search for ${description} - lines array` - console.time(name) + const ti2 = performance.now() const regex = new RegExp(pattern) for (let i = 0; i < trialCount; i++) { for (let row = 0, rowCount = lines.length; row < rowCount; row++) { @@ -32,11 +37,14 @@ function benchmarkSearch(description, pattern, expectedPosition) { } } } - console.timeEnd(name) - console.log() + const tf2 = performance.now() + console.log(`${name} ${' '.repeat(80-name.length)} ${(tf2-ti2).toFixed(3)} ms`) } benchmarkSearch('simple non-existent pattern', '\t', null) benchmarkSearch('complex non-existent pattern', '123|456|789', null) benchmarkSearch('simple existing pattern', 'jkl', {start: {row: 0, column: 12}, end: {row: 0, column: 15}}) -benchmarkSearch('complex existing pattern', 'j\\w+', {start: {row: 0, column: 12}, end: {row: 0, column: 15}}) \ No newline at end of file +benchmarkSearch('complex existing pattern', 'j\\w+', {start: {row: 0, column: 12}, end: {row: 0, column: 15}}) + + +console.log('\n text-buffer finished \n') \ No newline at end of file From 0e9523a0bb1e146d13fd45ef3cd46c24ec83a000 Mon Sep 17 00:00:00 2001 From: Amin Yahyaabadi Date: Tue, 8 Dec 2020 17:09:02 -0600 Subject: [PATCH 14/14] use performance in marker-index benchmarks + pretty print --- benchmark/marker-index.benchmark.js | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/benchmark/marker-index.benchmark.js b/benchmark/marker-index.benchmark.js index c6e2ac20..5a1058c3 100644 --- a/benchmark/marker-index.benchmark.js +++ b/benchmark/marker-index.benchmark.js @@ -1,6 +1,10 @@ 'use strict'; +console.log(' running marker-index tests... \n') + const Random = require('random-seed') +const {performance} = require("perf_hooks") + const {MarkerIndex} = require('..') const {traverse, traversalDistance, compare} = require('../test/js/helpers/point-helpers') @@ -41,12 +45,13 @@ function runBenchmark () { } function profileOperations (name, operations) { - console.time(name) + const ti1 = performance.now() for (let i = 0, n = operations.length; i < n; i++) { const operation = operations[i] markerIndex[operation[0]].apply(markerIndex, operation[1]) } - console.timeEnd(name) + const tf1 = performance.now() + console.log(`${name} ${' '.repeat(80-name.length)} ${(tf1-ti1).toFixed(3)} ms`) } function enqueueSequentialInsert () { @@ -118,3 +123,5 @@ function getSplice () { } runBenchmark() + +console.log(' \n marker-index finished \n')