Skip to content

Commit

Permalink
Use a generic 'image' MIME type for inline images whose URL does not …
Browse files Browse the repository at this point in the history
…end in an image file extension
  • Loading branch information
danburzo committed Aug 11, 2024
1 parent 035a221 commit cb0a57e
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 44 deletions.
8 changes: 4 additions & 4 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import {
} from './src/constants/markdown.js';

import slurp from './src/util/slurp.js';
import fileMimetype from './src/util/file-mimetype.js';
import { lookupMimetype } from './src/util/file-mimetype.js';
import epubDate from './src/util/epub-date.js';
import humanDate from './src/util/human-date.js';
import outputPath from './src/util/output-path.js';
Expand Down Expand Up @@ -169,15 +169,15 @@ async function fetchContent(ref, fetchOptions = {}) {
if (!url) {
return {
buffer: await readFile(ref),
contentType: fileMimetype(ref)
contentType: lookupMimetype(ref)
};
}

if (url && url.protocol === 'file:') {
url = decodeURI(url.href.replace(/^file:\/\//, ''));
return {
buffer: await readFile(url),
contentType: fileMimetype(url)
contentType: lookupMimetype(url)
};
}

Expand Down Expand Up @@ -978,7 +978,7 @@ async function epubgen(data, output_path, options) {
remoteResources: remoteResources.map(entry => ({
id: entry.mapped.replace(/[^a-z0-9]/gi, ''),
href: entry.mapped,
mimetype: fileMimetype(entry.mapped)
mimetype: lookupMimetype(entry.mapped)
}))
});

Expand Down
2 changes: 1 addition & 1 deletion src/constants/regex.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
to discern when an URL points to an image, but here we are.
`REGEX_IMAGE_URL` here needs to be kept in sync
with the `image_mimetypes` set defined in `inline-images.js`.
with the `imageMimetypes` set defined in `file-mimetype.js`.
*/
export const REGEX_IMAGE_URL = /\.(jpe?g|png|svg|gif|bmp|webp|avif|tiff?)$/i;
74 changes: 36 additions & 38 deletions src/inline-images.js
Original file line number Diff line number Diff line change
@@ -1,33 +1,15 @@
import { parseSrcset, stringifySrcset } from 'srcset';
import fileMimetype from './util/file-mimetype.js';
import { lookupMimetype, imageMimetypes } from './util/file-mimetype.js';
import fetchBase64 from './util/fetch-base64.js';

/*
Note: it is unfortunate that we use two separate mechanisms
to discern when an URL points to an image, but here we are.
`image_mimetypes` here needs to be kept in sync with the
`REGEX_IMAGE_URL` constant!
*/
const image_mimetypes = new Set([
'image/avif',
'image/bmp',
'image/gif',
'image/jpeg',
'image/png',
'image/svg+xml',
'image/tiff',
'image/webp'
]);

function get_mime(src, doc) {
let pathname = src;
try {
pathname = new URL(src, doc.baseURI).pathname;
} catch (err) {
// no-op, probably due to bad `doc.baseURI`
}
return fileMimetype(pathname);
return lookupMimetype(pathname);
}

export default async function inlineImages(doc, fetchOptions = {}, out) {
Expand All @@ -37,14 +19,22 @@ export default async function inlineImages(doc, fetchOptions = {}, out) {
let src_promises = Array.from(
doc.querySelectorAll('picture source[src], img[src]')
).map(async el => {
const mime = get_mime(el.src, doc);
if (mime && image_mimetypes.has(mime)) {
if (out) {
out.write(el.src + '\n');
}
let data = await fetchBase64(el.src, fetchOptions);
el.setAttribute('src', `data:${mime};base64,${data}`);
let mime = get_mime(el.src, doc);
/*
For web pages using atypical URLs for images
let’s just use a generic MIME type and hope it works.
For an example, see:
https://github.com/danburzo/percollate/issues/174
*/
if (!mime || !imageMimetypes.has(mime)) {
mime = 'image';
}
if (out) {
out.write(el.src + '\n');
}
let data = await fetchBase64(el.src, fetchOptions);
el.setAttribute('src', `data:${mime};base64,${data}`);
});

let srcset_promises = Array.from(
Expand All @@ -71,18 +61,26 @@ export default async function inlineImages(doc, fetchOptions = {}, out) {
stringifySrcset(
await Promise.all(
items.map(async item => {
const mime = get_mime(item.url, doc);
if (mime && image_mimetypes.has(mime)) {
let data = await fetchBase64(
item.url,
fetchOptions
);
return {
...item,
url: `data:${mime};base64,${data}`
};
let mime = get_mime(item.url, doc);

/*
For web pages using atypical URLs for images
let’s just use a generic MIME type and hope it works.
For an example, see:
https://github.com/danburzo/percollate/issues/174
*/
if (!mime || !imageMimetypes.has(mime)) {
mime = 'image';
}
return item;
let data = await fetchBase64(
item.url,
fetchOptions
);
return {
...item,
url: `data:${mime};base64,${data}`
};
})
)
)
Expand Down
24 changes: 23 additions & 1 deletion src/util/file-mimetype.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,28 @@ import mimetype from 'mimetype';
mimetype.set('.webp', 'image/webp');
mimetype.set('.avif', 'image/avif');

export default function lookup(filepath) {
export function lookupMimetype(filepath) {
return mimetype.lookup(filepath);
}

export function extForMimetype(mimetype) {
return Object.entries(mimetype.catalog).find(it => it[1] === mimetype)?.[0];
}

/*
Note: it is unfortunate that we use two separate mechanisms
to discern when an URL points to an image, but here we are.
`imageMimetypes` here needs to be kept in sync with the
`REGEX_IMAGE_URL` constant!
*/
export const imageMimetypes = new Set([
'image/avif',
'image/bmp',
'image/gif',
'image/jpeg',
'image/png',
'image/svg+xml',
'image/tiff',
'image/webp'
]);

0 comments on commit cb0a57e

Please sign in to comment.