Skip to content

Commit

Permalink
Merge branch 'feature/blueprint' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
guybedo committed Oct 29, 2018
2 parents a5a9812 + f20eb8f commit ace30e4
Show file tree
Hide file tree
Showing 10 changed files with 155 additions and 50 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,4 @@ typings/
.idea
.project
.settings
*.pdf
6 changes: 4 additions & 2 deletions cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@ configure();

function with_common_options(cmd) {
return cmd
.option('-b, --blueprint [blueprint]', 'Path to the blueprint')
.option('-o, --output [output]', 'Path for the generated bundle')
.option('--template [template]', 'Path to custom HTML template')
.option('--style [stylesheet]', 'Path to custom CSS')
.option('--css [style]', 'Additional CSS style')
.option('--individual', 'Export each web page as an individual file')
.option('--no-amp', "Don't prefer the AMP version of the web page")
.option('--toc', 'Generate a Table of Contents');
.option('--cover', 'Generate Cover')
.option('--toc', 'Generate TOC')
.option('--no-amp', "Don't prefer the AMP version of the web page");
}

program.version(pkg.version);
Expand Down
88 changes: 53 additions & 35 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const Readability = require('./vendor/readability');
const pkg = require('./package.json');

const spinner = ora();

const blueprints = require('./src/blueprints');
const {
ampToHtml,
fixLazyLoadedImages,
Expand Down Expand Up @@ -65,23 +65,26 @@ function configure() {
Fetch a web page and clean the HTML
-----------------------------------
*/
async function cleanup(url, options) {
async function cleanup(item, blueprint) {
try {
spinner.start(`Fetching: ${url}`);
const content = (await got(url, {
spinner.start(`Fetching: ${item.url}`);
const content = (await got(item.url, {
headers: {
'user-agent': `percollate/${pkg.version}`
}
})).body;
spinner.succeed();

spinner.start('Enhancing web page');
const dom = createDom({ url, content });
const dom = createDom({ url: item.url, content });

const amp = dom.window.document.querySelector('link[rel=amphtml]');
if (amp && options.amp) {
if (amp && blueprint.options.amp) {
spinner.succeed('Found AMP version');
return cleanup(amp.href, options);
return cleanup(
Object.assign({}, item, { url: amp.href }),
blueprint
);
}

/*
Expand All @@ -104,11 +107,12 @@ async function cleanup(url, options) {
}).parse();

spinner.succeed();

const _id = Math.random()
.toString(36)
.replace(/[^a-z]+/g, '')
.substr(2, 10);
return { ...parsed, _id, url };
return Object.assign({}, parsed, item, { _id: _id });
} catch (error) {
spinner.fail(error.message);
throw error;
Expand All @@ -119,24 +123,25 @@ async function cleanup(url, options) {
Bundle the HTML files into a PDF
--------------------------------
*/
async function bundle(items, options) {
async function bundle(blueprint) {
spinner.start('Generating temporary HTML file');
const temp_file = tmp.tmpNameSync({ postfix: '.html' });

const stylesheet = resolve(options.style || './templates/default.css');
const style = fs.readFileSync(stylesheet, 'utf8') + (options.css || '');
const generateToc = options.toc;
const stylesheet = resolve(blueprint.document.css);
let style = fs.readFileSync(stylesheet, 'utf8');
if (blueprint.cover.generate) {
style += fs.readFileSync(resolve(blueprint.cover.css), 'utf8');
}
if (blueprint.toc.generate) {
style += fs.readFileSync(resolve(blueprint.toc.css), 'utf8');
}

const html = nunjucks.renderString(
fs.readFileSync(
resolve(options.template || './templates/default.html'),
'utf8'
),
fs.readFileSync(resolve(blueprint.document.template), 'utf8'),
{
items,
items: blueprint.document.items,
style,
stylesheet, // deprecated
generateToc
blueprint
}
);

Expand Down Expand Up @@ -180,7 +185,11 @@ async function bundle(items, options) {

fs.writeFileSync(temp_file, html);

spinner.succeed(`Temporary HTML file: file://${temp_file}`);
spinner.succeed(
`Processed ${
blueprint.document.items.length
} items, temporary HTML file: file://${temp_file}`
);

spinner.start('Saving PDF');

Expand All @@ -190,7 +199,7 @@ async function bundle(items, options) {
Allow running with no sandbox
See: https://github.com/danburzo/percollate/issues/26
*/
args: options.sandbox
args: blueprint.options.sandbox
? undefined
: ['--no-sandbox', '--disable-setuid-sandbox'],
defaultViewport: {
Expand All @@ -213,9 +222,11 @@ async function bundle(items, options) {
in case we're bundling many web pages.
*/
const output_path =
options.output ||
(items.length === 1
? `${slugify(items[0].title || 'Untitled page')}.pdf`
blueprint.options.output ||
(blueprint.document.items.length === 1
? `${slugify(
blueprint.document.items[0].title || 'Untitled page'
)}.pdf`
: `percollate-${Date.now()}.pdf`);

await page.pdf({
Expand All @@ -236,18 +247,25 @@ async function bundle(items, options) {
Generate PDF
*/
async function pdf(urls, options) {
if (!urls.length) return;
let items = [];
for (let url of urls) {
let item = await cleanup(url, options);
if (options.individual) {
await bundle([item], options);
} else {
items.push(item);
}
const blueprint = blueprints.fromCommandLineOptions(urls, options);
if (!blueprint.document.items || !blueprint.document.items.length) {
return;
}
if (!options.individual) {
await bundle(items, options);
blueprint.document.items = await Promise.all(
blueprint.document.items.map(async function(item) {
return await cleanup(item, blueprint);
})
);
if (blueprint.options.individual) {
await Promise.all(
blueprint.document.items.map(async function(item) {
let itemBlueprint = Object.assign({}, blueprint);
itemBlueprint.document.items = [item];
await bundle(blueprint);
})
);
} else {
await bundle(blueprint);
}
}

Expand Down
84 changes: 84 additions & 0 deletions src/blueprints.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
const fs = require('fs');

function defaultBlueprint() {
return {
cover: _defaultCover(),
toc: _defaultToc(),
document: _defaultDocument(),
options: _defaultOptions()
};
}

function fromCommandLineOptions(urls, options) {
let blueprint = null;
if (options.blueprint) {
blueprint = JSON.parse(fs.readFileSync(options.blueprint, 'utf8'));
} else {
blueprint = _parseCommandLineOptions(options, defaultBlueprint());
}
if (urls && urls.length > 0) {
blueprint.document.items = urls.map(function(url) {
return { url: url };
});
}
return blueprint;
}

function _parseCommandLineOptions(options, blueprint) {
if (options.cover) {
blueprint.cover['generate'] = options.cover;
}
if (options.toc) {
blueprint.toc['generate'] = options.toc;
}
if (options.template) {
blueprint.document['template'] = options.template;
}
if (options.style) {
blueprint.document['css'] = options.style;
}
blueprint.options['sandbox'] = options.sandbox;
blueprint.options['output'] = options.output;
blueprint.options['individual'] = options.individual;
blueprint.options['no-amp'] = options.amp;
return blueprint;
}

function _defaultOptions() {
return {
output: 'percollate.pdf',
individual: false,
amp: true
};
}

function _defaultCover() {
return {
generate: false,
template: './templates/default_cover.html',
css: './templates/default_cover.css',
assets: {}
};
}

function _defaultToc() {
return {
generate: false,
template: './templates/default_toc.html',
css: './templates/default_toc.css',
assets: {}
};
}

function _defaultDocument() {
return {
template: './templates/default.html',
css: './templates/default.css',
assets: {},
items: []
};
}

module.exports = {
fromCommandLineOptions
};
4 changes: 0 additions & 4 deletions templates/default.css
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,6 @@ pre code {
display: none;
}

nav.toc {
page-break-after: always;
}

/*
Article formatting
----------------------------------------------------
Expand Down
11 changes: 2 additions & 9 deletions templates/default.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,8 @@
</head>
<body>

{% if generateToc %}
<nav class="toc">
<h1>Items</h1>
<ol>
{% for item in items %}
<li class="toc__line"><a href="#{{ item._id }}">{{ item.title }}</a></li>
{% endfor %}
</ol>
</nav>
{% if blueprint.toc.generate %}
{% include blueprint.toc.template %}
{% endif %}

{% for item in items %}
Expand Down
Empty file added templates/default_cover.css
Empty file.
Empty file added templates/default_cover.html
Empty file.
3 changes: 3 additions & 0 deletions templates/default_toc.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
nav.toc {
page-break-after: always;
}
8 changes: 8 additions & 0 deletions templates/default_toc.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<nav class="toc">
<h1>Items</h1>
<ol>
{% for item in items %}
<li class="toc__line"><a href="#{{ item._id }}">{{ item.title }}</a></li>
{% endfor %}
</ol>
</nav>

0 comments on commit ace30e4

Please sign in to comment.