-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathexport-minutes.html
346 lines (313 loc) · 10.8 KB
/
export-minutes.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
<!DOCTYPE html>
<!--
This is a tool to convert the minutes from Google Docs to Github-flavored markdown.
It is designed for use with https://github.com/w3c/webextensions
and only supports the (standard Google Docs) syntax from
https://docs.google.com/document/d/1QkwhEMtMS67JBUkl_WVPZ4lRSKoWcQNlLJSf_GwSXg8/edit
Questions? Ask [email protected]
-->
<head>
<meta charset="utf-8">
<title>WECG minutes converter - from Google Docs to Markdown</title>
<style>
html, body {
height: 100%;
margin: 0;
padding: 0;
}
body {
display: flex;
flex-direction: column;
}
#extraInfoOutput {
white-space: pre-wrap;
height: 8em;
overflow-y: auto;
}
#input, #output {
flex: 1;
overflow: auto;
background: lightgrey;
}
</style>
</head>
<body>
<div>Select the text in Google Docs and Paste the contents below:</div>
<div id="input" contenteditable></div>
<div>
<input type="button" id="convert" value="Convert above paste from Google Doc to (Github-flavored) markdown">
</div>
<div id="extraInfoOutput"></div>
<textarea id="output" placeholder="Markdown output appears here"></textarea>
<script>
var input = document.getElementById("input");
var output = document.getElementById("output");
var extraInfoOutput = document.getElementById("extraInfoOutput");
var convert = document.getElementById("convert");
convert.onclick = function() {
let markdownText = convertToMarkdown(input);
output.value = markdownText;
let issues = new Set();
let prs = new Set();
let mentionedWithoutLink = new Set();
let pat = /https:\/\/github\.com\/w3c\/webextensions\/(issues|pull)\/(\d+)/g, match;
while ((match = pat.exec(markdownText)) !== null) {
let [, issueOrPr, issueNr] = match;
if (issueOrPr === "pull") {
prs.add(issueNr);
} else {
issues.add(issueNr);
}
}
pat = /\sissue (\d+)/gi;
while ((match = pat.exec(markdownText)) !== null) {
let [, issueNr] = match;
if (!issues.has(issueNr) && !prs.has(issueNr)) {
mentionedWithoutLink.add(issueNr);
}
}
function serializeIssues(issueNrs) {
return Array.from(issueNrs, issueNr => `#${issueNr}`).join(", ") || "-";
}
extraInfoOutput.textContent = `
List of issues/PRs in order of appearance in the input:
- Issues: ${serializeIssues(issues)}
- PRs: ${serializeIssues(prs)}
- Mentioned issues without link to issue: ${serializeIssues(mentionedWithoutLink)}`;
if (markdownText.includes("```")) {
extraInfoOutput.textContent += `
WARNING: ${markdownText.match(/```/g).length / 2} code blocks (\`\`\`) found. You should verify the rendered output!`;
}
};
/**
This formatter does the following:
- Apply code formatting.
- Replace < with <
- Replace * and _ with \* and \_.
- Replace boldfaced with **xx**
- Replace italic with _xx_
- Replace links with [text](anchor)
- Replace h1, h2, h3, h4 with #, ##, ### and ####
- Format h1 header for consistency.
- Replace ol,ul and li with correctly indented list items.
- Fixup whitespace.
*/
function convertToMarkdown(elemRootInput) {
let root = elemRootInput.cloneNode(true);
// Apply code formatting first, before escaping characters.
// To avoid interference by transformations below, the code is replaced
// with placeholders, which we should restore in the end.
const { finalRestoreCodeBlocks } = replaceAllCodeBlocks(root);
// Escape < to avoid rendering as HTML.
replaceAllInTextNodes(root, "<", "<");
// Replace all unescaped _ and * with escaped ones to avoid undesired formatting.
replaceAllInTextNodes(root, /(?<=\s|^)[*_]|[*_](?=\s|$)/g, "\\$&");
// Apply boldfaced appearance.
for (let b of root.querySelectorAll(`span[style*="font-weight:700"]`)) {
b.prepend("**");
b.append("**");
}
// Apply italic appearance.
for (let i of root.querySelectorAll(`span[style*="font-style:italic"]`)) {
i.prepend("_");
i.append("_");
}
// Render links.
for (let a of root.querySelectorAll("a[href]")) {
if (a.href === a.textContent.trim()) {
continue;
}
let href = a.href.replaceAll(")", "%29");
a.prepend("[");
a.append(`](${href})`);
}
// Format headers
for (let h of root.querySelectorAll("h1")) {
// Replace header:
// WECG Meetings 2021, Public Notes—Oct 28, 2021
// WECG Meetings 2021, Public Notes, Oct 28
replaceAllInTextNodes(
h,
/(WECG Meetings \d{4}, Public Notes)—([A-Za-z]+ \d{1,2}), \d{4}/g,
"$1, $2"
);
h.prepend(`\n# `);
}
for (let h of root.querySelectorAll("h2")) {
h.prepend(`\n## `);
}
for (let h of root.querySelectorAll("h3")) {
h.prepend(`\n### `);
}
for (let h of root.querySelectorAll("h4")) {
h.prepend(`\n#### `);
}
for (let li of root.querySelectorAll("li")) {
let level = 0;
for (let parentNode = li.parentNode; parentNode !== root; parentNode = parentNode.parentNode) {
if (parentNode.tagName === "OL" || parentNode.tagName === "UL") {
++level;
}
}
let listItems = Array.from(li.parentNode.children).filter(e => e.tagName === "LI");
let listIndex = listItems.indexOf(li) + 1;
// Top-level (level 1) has no extra indentation, other levels 2 spaces per level.
let prefix = " ".repeat(level - 1);
if (li.parentNode.tagName === "OL") {
prefix += ` ${listIndex}. `;
} else {
prefix += " * ";
}
li.prepend(prefix);
// The structure is li > p > span, with the span containing the line content.
// When the li contains multiple lines (e.g. shift-Enter in Google docs),
// there may be multiple span elements (potentially containing just a single <br>)
for (let br of li.querySelectorAll(":scope > p > span > br")) {
// Indent the (text) content at the next span.
if (br.parentNode.nextElementSibling) {
br.after(" ".repeat(prefix.length));
}
}
let isNewList = li.parentNode.previousElementSibling?.tagName !== li.parentNode.tagName;
if (level === 1 && listIndex === 1 && isNewList) {
// Insert blank line before top-level list.
li.before("\n");
}
}
// Forced line break after every paragraph and br.
for (let elem of root.querySelectorAll("p, br")) {
elem.after("\n");
}
// Blank line after every header.
for (let elem of root.querySelectorAll("h1,h2,h3,h4")) {
elem.after("\n\n");
}
let textContent = root.textContent;
// Normalize ’ to '.
textContent = textContent.replaceAll("’", "'");
// Normalize non-breaking whitespace to regular whitespace.
textContent = textContent.replaceAll("\xA0", " ");
// Docs sometimes appends a space to a link even if not in the source text. Strip it
textContent = textContent.replaceAll(/ +(\]\([^)\n]+\)) */g, "$1 ");
// Trim trailing whitespace.
textContent = textContent.replaceAll(/ +$/gm, "");
// Remove consecutive line breaks to at most one empty line.
// May happen if header is followed by enumeration.
textContent = textContent.replace(/(\n\n)\n+/g, "$1")
// Each section header has two blank lines in front of it.
textContent = textContent.replace(/^(?=#+ )/gm, "\n");
// Trim leading whitespace.
textContent = textContent.trim();
textContent = finalRestoreCodeBlocks(textContent);
return textContent;
}
function replaceAllInTextNodes(root, pattern, replacement) {
let treeWalker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT);
let updatesNodes = [];
for (let node = treeWalker.nextNode(); node; node = treeWalker.nextNode()) {
let orig = node.nodeValue;
let proposed;
let origParts = orig.split("`");
if (origParts.length && (origParts.length % 2)) {
// Contains an even number of `; skip over code blocks.
proposed = origParts.map((str, i) => {
if (i % 2) {
// Outside backtick.
return str;
}
return str.replaceAll(pattern, replacement);
}).join("`");
} else {
proposed = orig.replaceAll(pattern, replacement);
}
if (orig !== proposed) {
updatesNodes.push([node, proposed]);
}
}
for (let [node, proposed] of updatesNodes) {
node.parentNode.replaceChild(document.createTextNode(proposed), node);
}
}
// Replaces code elements in |root| with.
function replaceAllCodeBlocks(root, getPlaceholder) {
// To prevent code blocks from being affected by text-based transformations
// in the end, replace the text with placeholders.
const codeTexts = new Map();
let nextCodeId = 1000;
function getPlaceholder(txt) {
// Assuming that minutes will never contain MINUTE_PLACEHOLDER_.
let placeholder = `^^^MINUTE_PLACEHOLDER_${nextCodeId++}===`;
codeTexts.set(placeholder, txt);
return placeholder;
}
function restorePlaceholders(txt) {
return txt.replace(
/\^\^\^MINUTE_PLACEHOLDER_\d+===/g,
placeholder => codeTexts.get(placeholder)
);
}
// First pass: Detect code lines (possibly multiline code) and inline code.
for (let c of root.querySelectorAll(`span[style*="font-family"][style*="monospace"]`)) {
if (c.style.fontFamily.includes("monospace")) {
if (c.closest("[this_is_really_a_code_block]")) {
// Already processed (determined that parent is code block).
continue;
}
if (
c.parentNode.tagName === "P" &&
!c.parentNode.querySelector(`span[style*="font-family"]:not([style*="monospace"])`)
) {
// Part of code block.
c.parentNode.setAttribute("this_is_really_a_code_block", "");
} else {
// Has siblings that is not code.
c.setAttribute("this_is_really_a_code_block", "");
}
}
}
// Second pass: Collapse multiline code with ```, use ` otherwise.
for (let c of root.querySelectorAll("[this_is_really_a_code_block]")) {
if (!root.contains(c)) {
// Already processed and remove()d below.
continue;
}
let codeNodes = [];
for (
let nod = c;
nod?.matches?.("[this_is_really_a_code_block],br");
nod = nod.nextSibling
) {
codeNodes.push(nod);
}
let codeText = "";
for (let nod of codeNodes) {
// br can be top-level, sole child of p, or wrapped in span.
for (let br of nod.querySelectorAll("br")) {
br.replaceWith("\n");
}
codeText += nod.textContent;
if (nod.tagName === "P" || nod.tagName === "BR") {
codeText += "\n";
}
}
codeText = codeText.replace(/\n+$/, "");
// Replace actual content with placeholder to prevent other logic such as
// the link wrapping / text replacement logic from mangling the code block.
c.textContent = getPlaceholder(codeText);
if (codeText.trim().includes("\n")) {
c.textContent = "```\n" + codeText + "\n```";
} else {
c.textContent = "`" + codeText + "`";
}
// codeNodes[0] === c; remove all except c.
codeNodes.slice(1).forEach(nod => nod.remove());
}
function finalRestoreCodeBlocks(textContent) {
textContent = restorePlaceholders(textContent);
return textContent;
}
return { finalRestoreCodeBlocks };
}
</script>
</body>
</html>