Skip to content

Commit

Permalink
Add --wait option to process URLs sequentially. (#134)
Browse files Browse the repository at this point in the history
Adds the -w, --wait=<sec> global CLI option to pause between processing URLs for a number of seconds. If unspecified, URLs are processed in parallel as before. Fixes #133.
  • Loading branch information
danburzo authored Jan 24, 2022
1 parent e2544c1 commit 0d497e3
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 6 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ Use the `--url` option to supply the source's original URL.
curl https://example.com | percollate pdf - --url=https://example.com
```

#### `-w, --wait`

By default, percollate processes URLs in parallel. Use the `--wait` option to process them sequentially instead, with a pause between items. The delay is specified in seconds, and can be zero.

```bash
percollate epub --wait=1 url1 url2 url3
```

#### `--individual`

By default, percollate bundles all web pages in a single file. Use the `--individual` flag to export each source to a separate file.
Expand Down
3 changes: 3 additions & 0 deletions cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ Commmon options:
-u, --url=<url> Sets the base URL when HTML is provided on stdin.
Multiple URL options can be specified.
-w, --wait=<sec> Process the provided URLs sequentially,
pausing a number of seconds between items.
-t <title>, The bundle title.
--title=<title>
Expand Down
31 changes: 25 additions & 6 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import mimetype from './src/util/mimetype.js';
import epubDate from './src/util/epub-date.js';
import humanDate from './src/util/human-date.js';
import outputPath from './src/util/output-path.js';
import { resolveSequence, resolveParallel } from './src/util/promises.js';
import addExif from './src/exif.js';
import { hyphenateDom } from './src/hyphenate.js';
import { textToIso6391, getLanguageAttribute } from './src/util/language.js';
Expand Down Expand Up @@ -548,19 +549,37 @@ async function generate(fn, urls, options = {}) {
if (!configured) {
configure();
}
if (!urls.length) return null;
if (!urls.length) {
return null;
}
let w = options.wait * 1000;
if (options.debug && w) {
if (Number.isFinite(w) && w >= 0) {
out.write(
`Processing URLs sequentially, waiting ${options.wait} seconds in-between.\n`
);
} else {
out.write(
`Invalid --wait: expecting positive number, got ${options.wait}. Processing URLs in parallel.\n`
);
}
}
let resolve =
Number.isFinite(w) && w >= 0 ? resolveSequence : resolveParallel;
let items = (
await Promise.all(
urls.map((url, i) =>
cleanup(url, {
await resolve(
urls,
(url, i) => {
return cleanup(url, {
...options,
preferred_url: options.url ? options.url[i] : undefined
}).catch(err => {
console.error(err);
console.log('Ignoring item');
return null;
})
)
});
},
w
)
).filter(it => it);

Expand Down
2 changes: 2 additions & 0 deletions src/cli-opts.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,15 @@ let opts_with_optarg = new Set([
'style',
'css',
'url',
'wait',
'title',
'author'
]);
let opts_with_arr = new Set(['url']);
let aliases = {
o: 'output',
u: 'url',
w: 'wait',
t: 'title',
a: 'author',
h: 'help',
Expand Down
32 changes: 32 additions & 0 deletions src/util/promises.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
Promise aggregation functions
-----------------------------
*/

/*
Run the asynchronous `fn` function sequentially
on each item in the `items` array, with an optional
`delay` in milliseconds between items.
*/
export function resolveSequence(arr, fn, delay = 0) {
return arr.reduce((chain, item, i) => {
return chain
.then(async result => {
if (delay && i > 0) {
await new Promise(r => setTimeout(r, delay));
}
return result;
})
.then(result => {
return fn(item, i, arr).then(content => [...result, content]);
});
}, Promise.resolve([]));
}

/*
Run the asynchronous `fn` function in parallel
on each item in the `items` array.
*/
export function resolveParallel(items, fn) {
return Promise.all(items.map(fn));
}
31 changes: 31 additions & 0 deletions test/promises.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { resolveSequence, resolveParallel } from '../src/util/promises.js';
import tape from 'tape';
import { performance } from 'perf_hooks';

let arr = [1, 2, 3];
let epsilon = 100; // milliseconds +/- error

tape('resolveSequence', async t => {
let begin = performance.now();
let delay = 500;
let res = await resolveSequence(arr, i => Promise.resolve(i), delay);
let expected_duration = delay * (arr.length - 1);
t.deepEqual(res, arr, 'correct result is returned');
t.ok(
Math.abs(performance.now() - begin - expected_duration) < epsilon,
'delay is applied'
);
t.end();
});

tape('resolveParallel', async t => {
let begin = performance.now();
let res = await resolveParallel(arr, i => Promise.resolve(i));
let expected_duration = 0;
t.deepEqual(res, arr, 'correct result is returned');
t.ok(
Math.abs(performance.now() - begin - expected_duration) < epsilon,
"there's no delay"
);
t.end();
});

0 comments on commit 0d497e3

Please sign in to comment.