Skip to content

Commit 07578e7

Browse files
authored
feat(husky): check deletions and broken fragments in URLs (mdn#31265)
* feat(husky): check deletions and broken fragments in URLs * mention it's the fragment in url
1 parent ecce8d4 commit 07578e7

File tree

7 files changed

+230
-3
lines changed

7 files changed

+230
-3
lines changed
+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
name: Check URL issues
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- main
7+
paths:
8+
- "files/**/*.md"
9+
10+
jobs:
11+
check_url_issues:
12+
#if: github.repository == 'mdn/content'
13+
runs-on: ubuntu-latest
14+
15+
steps:
16+
- uses: actions/checkout@v4
17+
with:
18+
fetch-depth: 0
19+
20+
- name: Setup Node.js environment
21+
uses: actions/setup-node@v4
22+
with:
23+
node-version-file: ".nvmrc"
24+
cache: yarn
25+
26+
- name: Check URL deletions and broken fragments
27+
run: |
28+
echo "::add-matcher::.github/workflows/url-issues-problem-matcher.json"
29+
git fetch origin main
30+
node scripts/log-url-issues.js --workflow
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"problemMatcher": [
3+
{
4+
"owner": "log-url-issues",
5+
"severity": "error",
6+
"pattern": [
7+
{
8+
"regexp": "^(ERROR|WARN|INFO):(.+):(\\d+):(\\d+):(.+)$",
9+
"severity": 1,
10+
"file": 2,
11+
"line": 3,
12+
"column": 4,
13+
"message": 5
14+
}
15+
]
16+
}
17+
]
18+
}

.lintstagedrc.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@
77
"prettier --write"
88
],
99
"tests/**/*.*": "yarn test:front-matter-linter",
10-
"*.{svg,png,jpeg,jpg,gif}": "yarn filecheck"
10+
"*.{svg,png,jpeg,jpg,gif}": "yarn filecheck",
11+
"*": "node scripts/log-url-issues.js"
1112
}

files/en-us/mdn/writing_guidelines/page_structures/syntax_sections/index.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ new Date(year, monthIndex, day, hours, minutes, seconds, milliseconds)
102102

103103
##### Formal syntax
104104

105-
Formal syntax notation (using [BNF](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form)) should not be used in the Syntax section — instead use the expanded multiple-line format [described above](multiple_linesoptional_parameters).
105+
Formal syntax notation (using [BNF](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form)) should not be used in the Syntax section — instead use the expanded multiple-line format [described above](#multiple_linesoptional_parameters).
106106

107107
While the formal notation provides a concise mechanism for describing complex syntax, it is not familiar to many developers, and can _conflict_ with valid syntax for particular programming languages. For example, "`[ ]`" indicates both an "optional parameter" and a JavaScript {{jsxref("Array")}}. You can see this in the formal syntax for {{jsxref("Array.prototype.slice()")}} below:
108108

files/en-us/mozilla/add-ons/webextensions/api/runtime/setuninstallurl/index.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ browser-compat: webextensions.api.runtime.setUninstallURL
77

88
{{AddonSidebar}}
99

10-
Sets the URL to be visited when the extension is uninstalled. This can be used to clean up server-side data, do analytics, or implement surveys. The URL can be up to 1023 characters. This limit used to be 255, see [Browser compatibility](browser_compatibility) for more details.
10+
Sets the URL to be visited when the extension is uninstalled. This can be used to clean up server-side data, do analytics, or implement surveys. The URL can be up to 1023 characters. This limit used to be 255, see [Browser compatibility](#browser_compatibility) for more details.
1111

1212
This is an asynchronous function that returns a [`Promise`](/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise).
1313

scripts/log-url-issues.js

+140
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/*
2+
* The script logs locations of affected URLs due to following reasons:
3+
* - file deletion
4+
* - Markdown header updates
5+
*/
6+
7+
import fs from "node:fs/promises";
8+
import path from "node:path";
9+
import {
10+
execGit,
11+
getRootDir,
12+
walkSync,
13+
isImagePath,
14+
getLocations,
15+
IMG_RX,
16+
stringToFragment,
17+
} from "./utils.js";
18+
19+
const rootDir = getRootDir();
20+
const argLength = process.argv.length;
21+
const deletedSlugs = [];
22+
const fragmentDetails = [];
23+
let isAllOk = true;
24+
25+
function getDeletedSlugs() {
26+
// git status --short --porcelain
27+
let result = execGit(["status", "--short", "--porcelain"], { cwd: "." });
28+
29+
if (result.trim()) {
30+
deletedSlugs.push(
31+
...result
32+
.split("\n")
33+
.filter(
34+
(line) =>
35+
/^\s*D\s+/gi.test(line) &&
36+
line.includes("files/en-us") &&
37+
(IMG_RX.test(line) || line.includes("index.md")),
38+
)
39+
.map((line) => line.replaceAll(/^\s*|files\/en-us\/|\/index.md/gm, ""))
40+
.map((line) => line.split(/\s+/)[1]),
41+
);
42+
}
43+
console.log("deletedSlugs", deletedSlugs);
44+
}
45+
46+
function getFragmentDetails(fromStaging = true) {
47+
let result = "";
48+
49+
if (fromStaging) {
50+
// get staged and unstaged changes
51+
result = execGit(["diff", "HEAD"], { cwd: "." });
52+
} else {
53+
// get diff between branch base and HEAD
54+
result = execGit(["diff", "origin/main...HEAD"], { cwd: "." });
55+
}
56+
57+
if (result.trim()) {
58+
const segments = [
59+
...result.split("diff --git a/").filter((segment) => segment !== ""),
60+
];
61+
for (const segment of segments) {
62+
const path = segment
63+
.substring(0, segment.indexOf(" "))
64+
.replaceAll(/files\/en-us\/|\/index.md/gm, "");
65+
66+
const headerRx = /^-#+ .*$/gm;
67+
const fragments = [...segment.matchAll(headerRx)]
68+
.map((match) => match[0].toLowerCase())
69+
.map((header) => header.replace(/-#+ /g, ""))
70+
.map((header) => stringToFragment(header));
71+
72+
for (const fragment of fragments) {
73+
fragmentDetails.push(`${path}#${fragment}`);
74+
}
75+
}
76+
}
77+
console.log("fragmentDetails", fragmentDetails);
78+
}
79+
80+
if (process.argv[2] !== "--workflow") {
81+
getDeletedSlugs();
82+
getFragmentDetails();
83+
} else {
84+
getFragmentDetails(false);
85+
}
86+
87+
if (deletedSlugs.length < 1 && fragmentDetails.length < 1) {
88+
console.log("Nothing to check. 🎉");
89+
process.exit(0);
90+
}
91+
92+
for await (const filePath of walkSync(getRootDir())) {
93+
if (filePath.endsWith("index.md")) {
94+
try {
95+
const content = await fs.readFile(filePath, "utf-8");
96+
const relativePath = filePath.substring(filePath.indexOf("files/en-us"));
97+
98+
// check deleted links
99+
for (const slug of deletedSlugs) {
100+
isAllOk = false;
101+
const locations = getLocations(
102+
content,
103+
new RegExp(`/${slug}[)># \"']`, "mig"),
104+
);
105+
if (locations.length) {
106+
for (const location of locations) {
107+
console.error(
108+
`ERROR:${relativePath}:${location.line}:${location.column}:Slug '${slug}' has been deleted`,
109+
);
110+
}
111+
}
112+
}
113+
114+
// check broken URL fragment
115+
for (const fragment of fragmentDetails) {
116+
isAllOk = false;
117+
const locations = getLocations(content, fragment);
118+
// check fragments in the same file
119+
const urlParts = fragment.split("#");
120+
if (filePath.includes(urlParts[0])) {
121+
locations.push(...getLocations(content, urlParts[1]));
122+
}
123+
if (locations.length) {
124+
for (const location of locations) {
125+
console.error(
126+
`ERROR:${relativePath}:${location.line}:${location.column}:URL fragment in URL '${fragment}' is broken`,
127+
);
128+
}
129+
}
130+
}
131+
} catch (e) {
132+
console.error(`Error processing ${filePath}: ${e.message}`);
133+
throw e;
134+
}
135+
}
136+
}
137+
138+
if (!isAllOk) {
139+
process.exit(1);
140+
}

scripts/utils.js

+38
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,41 @@ export function getRootDir() {
5050
export function isImagePath(path) {
5151
return IMG_RX.test(path);
5252
}
53+
54+
/*
55+
* Returns locations (line and column numbers) of 'searchValue' in the given 'content'.
56+
*/
57+
export function getLocations(content, searchValue) {
58+
const lineLengths = content.split("\n").map((line) => line.length);
59+
const searchRx =
60+
searchValue instanceof RegExp
61+
? searchValue
62+
: new RegExp(searchValue, "mig");
63+
const matches = [...content.matchAll(searchRx)].map((match) => match.index);
64+
const positions = [];
65+
66+
let currentPosition = 0;
67+
lineLengths.forEach((lineLength, index) => {
68+
lineLength += 1; // add '\n'
69+
for (const match of matches) {
70+
if (currentPosition < match && currentPosition + lineLength > match) {
71+
positions.push({
72+
line: index + 1,
73+
column: match - currentPosition + 1,
74+
});
75+
}
76+
}
77+
currentPosition += lineLength;
78+
});
79+
return positions;
80+
}
81+
82+
/*
83+
* Convert Markdown header into URL slug.
84+
*/
85+
export function stringToFragment(text) {
86+
return text
87+
.trim()
88+
.replace(/["#$%&+,/:;=?@[\]^`{|}~')(\\]/g, "")
89+
.replace(/\s+/g, "_");
90+
}

0 commit comments

Comments
 (0)