Skip to content

Commit c3dc8ff

Browse files
committed
fix: process images that are nested inside of columns
1 parent fe2d623 commit c3dc8ff

File tree

4 files changed

+95
-66
lines changed

4 files changed

+95
-66
lines changed

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
"notion-download": "node dist/index.js",
1515
"cmdhelp": "ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts",
1616
"// test out with a private sample notion db": "",
17-
"pull-test": "cross-var rm -rf ./docs/ && ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_TEST_ROOT_PAGE% --log-level debug",
17+
"pull-test-tagged": "cross-var rm -rf ./docs/ && ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_TEST_ROOT_PAGE% --log-level debug --status-tag test",
18+
"pull-test-outline": "cross-var rm -rf ./docs/ && ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_TEST_ROOT_PAGE% --log-level debug",
1819
"// test with a semi-stable/public site:": "",
1920
"pull-sample": "cross-var ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_SAMPLE_ROOT_PAGE% -m ./sample --locales en,es,fr,de --log-level verbose",
2021
"pull-sample-with-paths": "cross-var ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_SAMPLE_ROOT_PAGE% -m ./sample --img-output-path ./sample_img"

src/CustomTranformers.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ export function setupCustomTransformers(
2121
(block: ListBlockChildrenResponseResult) =>
2222
notionColumnToMarkdown(notionToMarkdown, notionClient, block)
2323
);
24+
25+
// Note: Pull.ts also adds an image transformer, but has to do that for each
26+
// page so we don't do it here.
2427
}
2528

2629
async function notionColumnListToMarkdown(
@@ -45,7 +48,6 @@ async function notionColumnListToMarkdown(
4548

4649
return `<div class='notion-row'>\n${columns.join("\n\n")}\n</div>`;
4750
}
48-
4951
async function notionColumnToMarkdown(
5052
notionToMarkdown: NotionToMarkdown,
5153
notionClient: Client,

src/images.ts

Lines changed: 69 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,14 @@ import FileType, { FileTypeResult } from "file-type";
33
import fetch from "node-fetch";
44
import * as Path from "path";
55
import { makeImagePersistencePlan } from "./MakeImagePersistencePlan";
6-
import { logDebug, verbose, info } from "./log";
7-
import { ListBlockChildrenResponse } from "@notionhq/client/build/src/api-endpoints";
6+
import { warning, logDebug, verbose, info } from "./log";
7+
import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";
8+
9+
// We several things here:
10+
// 1) copy images locally instead of leaving them in Notion
11+
// 2) change the links to point here
12+
// 3) read the caption and if there are localized images, get those too
13+
// 4) prepare for localized documents, which need a copy of every image
814

915
let existingImagesNotSeenYetInPull: string[] = [];
1016
let imageOutputPath = ""; // default to putting in the same directory as the document referring to it.
@@ -59,22 +65,65 @@ export async function initImageHandling(
5965
}
6066
}
6167

62-
export async function outputImages(
63-
blocks: (
64-
| ListBlockChildrenResponse
65-
| /* not avail in types: BlockObjectResponse so we use any*/ any
66-
)[],
68+
// This is a "custom transformer" function passed to notion-to-markdown
69+
// eslint-disable-next-line @typescript-eslint/require-await
70+
export async function markdownToMDImageTransformer(
71+
block: ListBlockChildrenResponseResult,
6772
fullPathToDirectoryContainingMarkdown: string,
6873
relativePathToThisPage: string
74+
): Promise<string> {
75+
const image = (block as any).image;
76+
77+
await processImageBlock(
78+
image,
79+
fullPathToDirectoryContainingMarkdown,
80+
relativePathToThisPage
81+
);
82+
83+
// just concatenate the caption text parts together
84+
const altText: string = image.caption
85+
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
86+
.map((item: any) => item.plain_text)
87+
.join("");
88+
89+
const href: string =
90+
image.type === "external" ? image.external.url : image.file.url;
91+
return `![${altText}](${href})`;
92+
}
93+
94+
async function processImageBlock(
95+
imageBlock: any,
96+
pathToParentDocument: string,
97+
relativePathToThisPage: string
6998
): Promise<void> {
70-
for (const b of blocks) {
71-
if ("image" in b) {
72-
await processImageBlock(
73-
b,
74-
fullPathToDirectoryContainingMarkdown,
75-
relativePathToThisPage
76-
);
77-
}
99+
logDebug("processImageBlock", JSON.stringify(imageBlock));
100+
101+
// this is broken into all these steps to facilitate unit testing without IO
102+
const imageSet = parseImageBlock(imageBlock);
103+
imageSet.pathToParentDocument = pathToParentDocument;
104+
imageSet.relativePathToParentDocument = relativePathToThisPage;
105+
106+
await readPrimaryImage(imageSet);
107+
makeImagePersistencePlan(imageSet, imageOutputPath, imagePrefix);
108+
await saveImage(imageSet);
109+
110+
// change the src to point to our copy of the image
111+
if ("file" in imageBlock) {
112+
imageBlock.file.url = imageSet.filePathToUseInMarkdown;
113+
} else {
114+
imageBlock.external.url = imageSet.filePathToUseInMarkdown;
115+
}
116+
// put back the simplified caption, stripped of the meta information
117+
if (imageSet.caption) {
118+
imageBlock.caption = [
119+
{
120+
type: "text",
121+
text: { content: imageSet.caption, link: null },
122+
plain_text: imageSet.caption,
123+
},
124+
];
125+
} else {
126+
imageBlock.caption = [];
78127
}
79128
}
80129

@@ -127,20 +176,20 @@ function writeImageIfNew(path: string, buffer: Buffer) {
127176
fs.createWriteStream(path).write(buffer); // async but we're not waiting
128177
}
129178

130-
export function parseImageBlock(b: any): ImageSet {
179+
export function parseImageBlock(image: any): ImageSet {
131180
const imageSet: ImageSet = {
132181
primaryUrl: "",
133182
caption: "",
134183
localizedUrls: locales.map(l => ({ iso632Code: l, url: "" })),
135184
};
136185

137-
if ("file" in b.image) {
138-
imageSet.primaryUrl = b.image.file.url; // image saved on notion (actually AWS)
186+
if ("file" in image) {
187+
imageSet.primaryUrl = image.file.url; // image saved on notion (actually AWS)
139188
} else {
140-
imageSet.primaryUrl = b.image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc.
189+
imageSet.primaryUrl = image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc.
141190
}
142191

143-
const mergedCaption: string = b.image.caption
192+
const mergedCaption: string = image.caption
144193
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
145194
.map((c: any) => c.plain_text)
146195
.join("");
@@ -169,44 +218,6 @@ export function parseImageBlock(b: any): ImageSet {
169218
return imageSet;
170219
}
171220

172-
// Download the image if we don't have it, give it a good name, and
173-
// change the src to point to our copy of the image.
174-
async function processImageBlock(
175-
b: any,
176-
pathToParentDocument: string,
177-
relativePathToThisPage: string
178-
): Promise<void> {
179-
logDebug("processImageBlock", JSON.stringify(b));
180-
181-
// this is broken into all these steps to facilitate unit testing without IO
182-
const imageSet = parseImageBlock(b);
183-
imageSet.pathToParentDocument = pathToParentDocument;
184-
imageSet.relativePathToParentDocument = relativePathToThisPage;
185-
186-
await readPrimaryImage(imageSet);
187-
makeImagePersistencePlan(imageSet, imageOutputPath, imagePrefix);
188-
await saveImage(imageSet);
189-
190-
// change the src to point to our copy of the image
191-
if ("file" in b.image) {
192-
b.image.file.url = imageSet.filePathToUseInMarkdown;
193-
} else {
194-
b.image.external.url = imageSet.filePathToUseInMarkdown;
195-
}
196-
// put back the simplified caption, stripped of the meta information
197-
if (imageSet.caption) {
198-
b.image.caption = [
199-
{
200-
type: "text",
201-
text: { content: imageSet.caption, link: null },
202-
plain_text: imageSet.caption,
203-
},
204-
];
205-
} else {
206-
b.image.caption = [];
207-
}
208-
}
209-
210221
function imageWasSeen(path: string) {
211222
existingImagesNotSeenYetInPull = existingImagesNotSeenYetInPull.filter(
212223
p => p !== path

src/pull.ts

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,18 @@ import { NotionToMarkdown } from "notion-to-md";
44
import { HierarchicalNamedLayoutStrategy } from "./HierarchicalNamedLayoutStrategy";
55
import { LayoutStrategy } from "./LayoutStrategy";
66
import { initNotionClient, NotionPage, PageType } from "./NotionPage";
7-
import { initImageHandling, cleanupOldImages, outputImages } from "./images";
7+
import {
8+
initImageHandling,
9+
cleanupOldImages,
10+
markdownToMDImageTransformer,
11+
} from "./images";
812

913
import { tweakForDocusaurus } from "./DocusaurusTweaks";
1014
import { setupCustomTransformers } from "./CustomTranformers";
1115
import * as Path from "path";
1216
import { error, info, logDebug, verbose, warning } from "./log";
1317
import { convertInternalLinks } from "./links";
18+
import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";
1419

1520
export type Options = {
1621
notionToken: string;
@@ -164,14 +169,21 @@ async function outputPage(page: NotionPage) {
164169
const relativePathToFolderContainingPage = Path.dirname(
165170
layoutStrategy.getLinkPathForPage(page)
166171
);
167-
await outputImages(
168-
blocks,
169-
directoryContainingMarkdown,
170-
relativePathToFolderContainingPage
171-
);
172+
logDebug("pull", JSON.stringify(blocks));
172173

173174
currentSidebarPosition++;
174175

176+
// we have to set this one up for each page because we need to
177+
// give it two extra parameters that are context for each page
178+
notionToMarkdown.setCustomTransformer(
179+
"image",
180+
(block: ListBlockChildrenResponseResult) =>
181+
markdownToMDImageTransformer(
182+
block,
183+
directoryContainingMarkdown,
184+
relativePathToFolderContainingPage
185+
)
186+
);
175187
const mdBlocks = await notionToMarkdown.blocksToMarkdown(blocks);
176188

177189
// if (page.nameOrTitle.startsWith("Embed")) {
@@ -187,8 +199,11 @@ async function outputPage(page: NotionPage) {
187199
frontmatter += "---\n";
188200

189201
let markdown = notionToMarkdown.toMarkdownString(mdBlocks);
202+
203+
// Improve: maybe this could be another markdown-to-md "custom transformer"
190204
markdown = convertInternalLinks(markdown, pages, layoutStrategy);
191205

206+
// Improve: maybe this could be another markdown-to-md "custom transformer"
192207
const { body, imports } = tweakForDocusaurus(markdown);
193208
const output = `${frontmatter}\n${imports}\n${body}`;
194209

0 commit comments

Comments
 (0)