Skip to content

Commit ae3dadf

Browse files
authored
cache asset images more aggressively (github#23553)
* cache asset images more aggressively * more careful about which gets the manual surrogate key * fix rendered-content-link-checker script too * feedbacked
1 parent dc1a510 commit ae3dadf

File tree

8 files changed

+150
-16
lines changed

8 files changed

+150
-16
lines changed

lib/render-content/create-processor.js

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import HighlightjsGraphql from 'highlightjs-graphql'
1818
import remarkCodeExtra from 'remark-code-extra'
1919
import codeHeader from './plugins/code-header.js'
2020
import rewriteLocalLinks from './plugins/rewrite-local-links.js'
21+
import rewriteImgSources from './plugins/rewrite-asset-urls.js'
2122
import useEnglishHeadings from './plugins/use-english-headings.js'
2223
import rewriteLegacyAssetPaths from './plugins/rewrite-legacy-asset-paths.js'
2324
import wrapInElement from './plugins/wrap-in-element.js'
@@ -45,6 +46,7 @@ export default function createProcessor(context) {
4546
.use(raw)
4647
.use(rewriteLegacyAssetPaths, context)
4748
.use(wrapInElement, { selector: 'ol > li img', wrapper: 'span.procedural-image-wrapper' })
49+
.use(rewriteImgSources)
4850
.use(rewriteLocalLinks, {
4951
languageCode: context.currentLanguage,
5052
version: context.currentVersion,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import fs from 'fs'
2+
3+
import { visit } from 'unist-util-visit'
4+
5+
// Matches any <img> tags with an href that starts with `/assets/` or '/public/'
6+
const matcher = (node) =>
7+
node.type === 'element' &&
8+
node.tagName === 'img' &&
9+
node.properties &&
10+
node.properties.src &&
11+
(node.properties.src.startsWith('/assets/') || node.properties.src.startsWith('/public/'))
12+
13+
// Content authors write images like `![Alt](/assets/images/foo.png`, but
14+
// for caching purposes we want to rewrite those so they can be cached
15+
// indefinitely.
16+
export default function rewriteImgSources() {
17+
return (tree) => {
18+
visit(tree, matcher, (node) => {
19+
const newSrc = getNewSrc(node)
20+
if (newSrc) {
21+
node.properties.src = newSrc
22+
}
23+
})
24+
}
25+
}
26+
27+
function getNewSrc(node) {
28+
const { src } = node.properties
29+
if (!src.startsWith('/')) return
30+
31+
try {
32+
const filePath = src.slice(1)
33+
const stats = fs.statSync(filePath)
34+
// The size is not perfect but it's good enough. The size is
35+
// very fast to pick up without needing to do a deep hashing of the
36+
// image's content. It's perfectly possible that someone edits an
37+
// image and it's size doesn't change. Although very unlikely.
38+
// The slightest change to the image is more likely to either increase
39+
// or decrease the image size by at least 1 byte.
40+
// Also, because of this limitation, we're not confident to cache the
41+
// image more than say 24h. But in the unlikely event that someone does
42+
// edit an image and the size doesn't change, there's always the
43+
// escape hatch that you can soft-purge all manual CDN surrogate keys.
44+
if (!stats.size) return
45+
const hash = `${stats.size}`
46+
const split = src.split('/')
47+
split.splice(2, 0, `cb-${hash}`)
48+
return split.join('/')
49+
} catch (err) {
50+
console.warn(`Error trying to get a hash for ${src}`, err)
51+
}
52+
}

lib/rewrite-local-links.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ function getNewHref(link, languageCode, version) {
2626
const href = link.attr('href')
2727

2828
// Exceptions to link rewriting
29-
if (href.startsWith('/assets')) return
30-
if (href.startsWith('/public')) return
29+
if (href.startsWith('/assets/')) return
30+
if (href.startsWith('/public/')) return
3131
if (Object.keys(externalRedirects).includes(href)) return
3232

3333
let newHref = href

middleware/asset-preprocessing.js

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// This middleware rewrites the URL of requests that contain the
2+
// portion of `/cb-\d+/`.
3+
// "cb" stands for "cache bust".
4+
// There's a Markdown plugin that rewrites all <img src> values
5+
// from `<img src="/assets/foo/bar.png">` to
6+
// `<img src="/assets/cb-123467/foo/bar.png">` for example.
7+
// We're doing this so that we can set a much more aggressive
8+
// Cache-Control for assets and a CDN surrogate-key that doesn't
9+
// soft-purge on every deployment.
10+
11+
const regex = /\/cb-\d+\//
12+
13+
export default function assetPreprocessing(req, res, next) {
14+
if (req.path.startsWith('/assets/')) {
15+
// We're only confident enough to set the *manual* surrogate key if the
16+
// asset contains the cache-busting piece.
17+
if (regex.test(req.url)) {
18+
// We remove it so that when `express.static()` runs, it can
19+
// find the file on disk by its original name.
20+
req.url = req.url.replace(regex, '/')
21+
// The Cache-Control is managed by the configuration
22+
// for express.static() later in the middleware.
23+
}
24+
}
25+
return next()
26+
}

middleware/index.js

+16-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@ import csrf from './csrf.js'
1414
import handleCsrfErrors from './handle-csrf-errors.js'
1515
import compression from 'compression'
1616
import disableCachingOnSafari from './disable-caching-on-safari.js'
17-
import setDefaultFastlySurrogateKey from './set-fastly-surrogate-key.js'
17+
import {
18+
setDefaultFastlySurrogateKey,
19+
setManualFastlySurrogateKey,
20+
} from './set-fastly-surrogate-key.js'
1821
import setFastlyCacheHeaders from './set-fastly-cache-headers.js'
1922
import catchBadAcceptLanguage from './catch-bad-accept-language.js'
2023
import reqUtils from './req-utils.js'
@@ -57,6 +60,7 @@ import featuredLinks from './featured-links.js'
5760
import learningTrack from './learning-track.js'
5861
import next from './next.js'
5962
import renderPage from './render-page.js'
63+
import assetPreprocessing from './asset-preprocessing.js'
6064

6165
const { NODE_ENV } = process.env
6266
const isDevelopment = NODE_ENV === 'development'
@@ -95,17 +99,25 @@ export default function (app) {
9599
instrument(archivedEnterpriseVersionsAssets, './archived-enterprise-versions-assets')
96100
)
97101
)
102+
// This must come before the express.static('assets') middleware.
103+
app.use(assetPreprocessing)
104+
// By specifying '/assets/cb-' and not just '/assets/' we
105+
// avoid possibly legacy enterprise assets URLs and asset image URLs
106+
// that don't have the cache-busting piece in it.
107+
app.use('/assets/cb-', setManualFastlySurrogateKey)
98108
app.use(
99-
'/assets',
109+
'/assets/',
100110
express.static('assets', {
101111
index: false,
102112
etag: false,
103113
lastModified: false,
104-
maxAge: '1 day', // Relatively short in case we update images
114+
// Can be aggressive because images inside the content get unique
115+
// URLs with a cache busting prefix.
116+
maxAge: '7 days',
105117
})
106118
)
107119
app.use(
108-
'/public',
120+
'/public/',
109121
express.static('data/graphql', {
110122
index: false,
111123
etag: false,

middleware/set-fastly-surrogate-key.js

+6-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,12 @@ export function setFastlySurrogateKey(res, enumKey) {
2323
res.set(KEY, enumKey)
2424
}
2525

26-
export default function setDefaultFastlySurrogateKey(req, res, next) {
26+
export function setManualFastlySurrogateKey(req, res, next) {
27+
res.set(KEY, SURROGATE_ENUMS.MANUAL)
28+
return next()
29+
}
30+
31+
export function setDefaultFastlySurrogateKey(req, res, next) {
2732
res.set(KEY, SURROGATE_ENUMS.DEFAULT)
2833
return next()
2934
}

script/rendered-content-link-checker.mjs

+7-1
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,13 @@ async function processPermalink(permalink, page, pageMap, redirects, opts) {
240240

241241
if (checkImages) {
242242
$('img[src]').each((i, img) => {
243-
const { src } = img.attribs
243+
let { src } = img.attribs
244+
245+
// Images get a cache-busting prefix injected in the image
246+
// E.g. <img src="/assets/cb-123456/foo/bar.png">
247+
// We need to remove that otherwise we can't look up the image
248+
// on disk.
249+
src = src.replace(/\/cb-\d+\//, '/')
244250

245251
if (globalImageSrcCheckCache.has(src)) {
246252
globalCacheHitCount++

tests/rendering/server.js

+39-8
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ describe('server', () => {
375375
})
376376

377377
describe('image asset paths', () => {
378+
const localImageCacheBustBasePathRegex = /^\/assets\/cb-\d+\/images\//
378379
const localImageBasePath = '/assets/images'
379380
const legacyImageBasePath = '/assets/enterprise'
380381
const latestEnterprisePath = `/en/enterprise/${enterpriseServerReleases.latest}`
@@ -384,7 +385,10 @@ describe('server', () => {
384385
const $ = await getDOM(
385386
'/en/github/authenticating-to-github/configuring-two-factor-authentication'
386387
)
387-
expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
388+
const imageSrc = $('img').first().attr('src')
389+
expect(
390+
localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
391+
).toBe(true)
388392
})
389393

390394
test('github articles on GHE have images that point to local assets dir', async () => {
@@ -393,7 +397,9 @@ describe('server', () => {
393397
)
394398
const imageSrc = $('img').first().attr('src')
395399
expect(
396-
imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
400+
localImageCacheBustBasePathRegex.test(imageSrc) ||
401+
imageSrc.startsWith(localImageBasePath) ||
402+
imageSrc.startsWith(legacyImageBasePath)
397403
).toBe(true)
398404
})
399405

@@ -403,7 +409,9 @@ describe('server', () => {
403409
)
404410
const imageSrc = $('img').first().attr('src')
405411
expect(
406-
imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
412+
localImageCacheBustBasePathRegex.test(imageSrc) ||
413+
imageSrc.startsWith(localImageBasePath) ||
414+
imageSrc.startsWith(legacyImageBasePath)
407415
).toBe(true)
408416
})
409417

@@ -413,7 +421,9 @@ describe('server', () => {
413421
)
414422
const imageSrc = $('img').first().attr('src')
415423
expect(
416-
imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
424+
localImageCacheBustBasePathRegex.test(imageSrc) ||
425+
imageSrc.startsWith(localImageBasePath) ||
426+
imageSrc.startsWith(legacyImageBasePath)
417427
).toBe(true)
418428
})
419429

@@ -428,14 +438,20 @@ describe('server', () => {
428438
const $ = await getDOM(
429439
'/en/enterprise-cloud@latest/billing/managing-billing-for-your-github-account/viewing-the-subscription-and-usage-for-your-enterprise-account'
430440
)
431-
expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
441+
const imageSrc = $('img').first().attr('src')
442+
expect(
443+
localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
444+
).toBe(true)
432445
})
433446

434447
test('admin articles on GHEC have images that point to local assets dir', async () => {
435448
const $ = await getDOM(
436449
'/en/enterprise-cloud@latest/admin/configuration/configuring-your-enterprise/verifying-or-approving-a-domain-for-your-enterprise'
437450
)
438-
expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
451+
const imageSrc = $('img').first().attr('src')
452+
expect(
453+
localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
454+
).toBe(true)
439455
})
440456

441457
test('github articles on GHAE have images that point to local assets dir', async () => {
@@ -444,13 +460,18 @@ describe('server', () => {
444460
)
445461
const imageSrc = $('img').first().attr('src')
446462
expect(
447-
imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
463+
localImageCacheBustBasePathRegex.test(imageSrc) ||
464+
imageSrc.startsWith(localImageBasePath) ||
465+
imageSrc.startsWith(legacyImageBasePath)
448466
).toBe(true)
449467
})
450468

451469
test('admin articles on GHAE have images that point to local assets dir', async () => {
452470
const $ = await getDOM('/en/github-ae@latest/admin/user-management/managing-dormant-users')
453-
expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
471+
const imageSrc = $('img').first().attr('src')
472+
expect(
473+
localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
474+
).toBe(true)
454475
})
455476
})
456477

@@ -1003,6 +1024,16 @@ describe('static routes', () => {
10031024
expect(res.headers['surrogate-key']).toBeTruthy()
10041025
})
10051026

1027+
it('rewrites /assets requests from a cache-busting prefix', async () => {
1028+
// The rewrite-asset-urls.js Markdown plugin will do this to img tags.
1029+
const res = await get('/assets/cb-123456/images/site/be-social.gif')
1030+
expect(res.statusCode).toBe(200)
1031+
expect(res.headers['set-cookie']).toBeUndefined()
1032+
expect(res.headers['cache-control']).toContain('public')
1033+
expect(res.headers['cache-control']).toMatch(/max-age=\d+/)
1034+
expect(res.headers['surrogate-key']).toBeTruthy()
1035+
})
1036+
10061037
it('serves schema files from the /data/graphql directory at /public', async () => {
10071038
const res = await get('/public/schema.docs.graphql')
10081039
expect(res.statusCode).toBe(200)

0 commit comments

Comments
 (0)