Skip to content

Commit bb31930

Browse files
committed
fix: get internal links working
1 parent c314bf8 commit bb31930

File tree

5 files changed

+48
-26
lines changed

5 files changed

+48
-26
lines changed

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"notion-download": "node dist/index.js",
1515
"cmdhelp": "ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts",
1616
"// test out with a private sample notion db": "",
17-
"pull-test": "cross-var ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_TEST_ROOT_PAGE% --log-level debug",
17+
"pull-test": "cross-var rm -rf ./docs/ && ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_TEST_ROOT_PAGE% --log-level debug",
1818
"// test with a semi-stable/public site:": "",
1919
"pull-sample": "cross-var ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_SAMPLE_ROOT_PAGE% -m ./sample --locales en,es,fr,de --log-level verbose",
2020
"pull-sample-with-paths": "cross-var ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_SAMPLE_ROOT_PAGE% -m ./sample --img-output-path ./sample_img"

src/HierarchicalNamedLayoutStrategy.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ export class HierarchicalNamedLayoutStrategy extends LayoutStrategy {
1919
context: string,
2020
levelLabel: string
2121
): string {
22-
const path = context + "/" + sanitize(levelLabel);
22+
const path = context + "/" + sanitize(levelLabel).replaceAll(" ", "-");
2323

2424
//console.log("Creating level " + path);
2525
fs.mkdirSync(dirRoot + "/" + path, { recursive: true });

src/NotionPage.ts

+9-6
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,15 @@ export class NotionPage {
5858
}
5959

6060
public matchesLinkId(id: string): boolean {
61-
return (
61+
const match =
6262
id === this.pageId || // from a link_to_page.pageId, which still has the dashes
63-
id === this.pageId.replace(/-/g, "")
64-
); // from inline links, which are lacking the dashes
63+
id === this.pageId.replaceAll("-", ""); // from inline links, which are lacking the dashes
64+
65+
logDebug(
66+
`matchedLinkId`,
67+
`comparing pageId:${this.pageId} to id ${id} --> ${match.toString()}`
68+
);
69+
return match;
6570
}
6671

6772
public get type(): PageType {
@@ -98,9 +103,7 @@ export class NotionPage {
98103
public get slug(): string {
99104
const explicitSlug = this.getPlainTextProperty("Slug", "");
100105
if (explicitSlug) return explicitSlug;
101-
return encodeURIComponent(
102-
this.nameOrTitle.toLowerCase().replaceAll(" ", "-")
103-
)
106+
return encodeURIComponent(this.nameOrTitle.replaceAll(" ", "-"))
104107
.replaceAll("%3A", "-")
105108
.replaceAll("--", "-");
106109
}

src/index.ts

-4
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,6 @@ program
4242
"-i, --img-output-path <string>",
4343
"Path to directory where images will be stored. If this is not included, images will be placed in the same directory as the document that uses them, which then allows for localization of screenshots."
4444
)
45-
// .option(
46-
// "-l, --internal-link-prefix <string>",
47-
// "when converting a link from one page to another, prefix the with this path instead of the default, which is rooted at the markdown-output-path."
48-
// )
4945
.option(
5046
"-p, --img-prefix-in-markdown <string>",
5147
"When referencing an image from markdown, prefix with this path instead of the full img-output-path. Should be used only in conjunction with --img-output-path."

src/links.ts

+37-14
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
import { LayoutStrategy } from "./LayoutStrategy";
2-
import { verbose, warning } from "./log";
2+
import { error, verbose, warning } from "./log";
33
import { NotionPage } from "./NotionPage";
44

55
export function convertInternalLinks(
66
markdown: string,
77
pages: NotionPage[],
88
layoutStrategy: LayoutStrategy
99
): string {
10-
//console.log(JSON.stringify(pages, null, 2));
11-
12-
return transformLinks(markdown, (url: string) => {
10+
const convertHref = (url: string) => {
1311
const p = pages.find(p => {
1412
return p.matchesLinkId(url);
1513
});
@@ -25,8 +23,29 @@ export function convertInternalLinks(
2523
);
2624

2725
return url;
28-
});
26+
};
27+
const convertLinkText = (text: string, url: string) => {
28+
// In Notion, if you just add a link to a page without linking it to any text, then in Notion
29+
// you see the name of the page as the text of the link. But when Notion gives us that same
30+
// link, it uses "link_to_page" as the text. So we have to look up the name of the page in
31+
// order to fix that.
32+
if (text !== "link_to_page") {
33+
return text;
34+
}
35+
36+
const p = pages.find(p => {
37+
return p.matchesLinkId(url);
38+
});
39+
if (p) {
40+
return p.nameOrTitle;
41+
} else {
42+
error(`Encountered a link to page ${url} but could not find that page.`);
43+
return "Problem Link";
44+
}
45+
};
46+
return transformLinks(markdown, convertHref, convertLinkText);
2947
}
48+
3049
// function convertInternalLinks(
3150
// blocks: (
3251
// | ListBlockChildrenResponse
@@ -48,27 +67,31 @@ export function convertInternalLinks(
4867
// });
4968
// }
5069

51-
function transformLinks(input: string, transform: (url: string) => string) {
70+
function transformLinks(
71+
pageMarkdown: string,
72+
convertHref: (url: string) => string,
73+
convertLinkText: (text: string, url: string) => string
74+
) {
5275
// Note: from notion (or notion-md?) we get slightly different hrefs depending on whether the links is "inline"
5376
// (has some other text that's been turned into a link) or "raw".
5477
// Raw links come in without a leading slash, e.g. [link_to_page](4a6de8c0-b90b-444b-8a7b-d534d6ec71a4)
5578
// Inline links come in with a leading slash, e.g. [pointer to the introduction](/4a6de8c0b90b444b8a7bd534d6ec71a4)
5679
const linkRegExp = /\[([^\]]+)?\]\(\/?([^),^/]+)\)/g;
57-
let output = input;
80+
let output = pageMarkdown;
5881
let match;
5982

6083
// The key to understanding this while is that linkRegExp actually has state, and
6184
// it gives you a new one each time. https://stackoverflow.com/a/1520853/723299
62-
verbose(`transformLinks ${input}`);
63-
while ((match = linkRegExp.exec(input)) !== null) {
85+
verbose(`transformLinks ${pageMarkdown}`);
86+
while ((match = linkRegExp.exec(pageMarkdown)) !== null) {
6487
const string = match[0];
65-
const text = match[1] || "";
66-
const url = match[2];
6788

68-
const replacement = transform(url);
89+
const hrefFromNotion = match[2];
90+
const text = convertLinkText(match[1] || "", hrefFromNotion);
91+
const hrefForDocusaurus = convertHref(hrefFromNotion);
6992

70-
if (replacement) {
71-
output = output.replace(string, `[${text}](${replacement})`);
93+
if (hrefForDocusaurus) {
94+
output = output.replace(string, `[${text}](${hrefForDocusaurus})`);
7295
} else {
7396
verbose(`Maybe problem with link ${JSON.stringify(match)}`);
7497
}

0 commit comments

Comments
 (0)