Skip to content

Commit a3177f1

Browse files
committed
step function code working with private github + hashnode (primary) + devTo
1 parent d61eae3 commit a3177f1

10 files changed

+2914
-113
lines changed

functions/identify-new-content.ts

Lines changed: 128 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,63 @@
11
import { Octokit } from "octokit";
2-
import { SFNClient, StartExecutionCommand } from '@aws-sdk/client-sfn';
2+
import { SFNClient, StartExecutionCommand } from "@aws-sdk/client-sfn";
33
import { getSecret } from "./utils/secrets";
4+
import { join } from "path";
5+
import { S3Client, PutObjectCommand } from "@aws-sdk/client-s3";
46

57
const sfn = new SFNClient({});
8+
const s3 = new S3Client({});
9+
const blogPathDefined = !!(
10+
process.env.BLOG_PATH && process.env.BLOG_PATH !== "/"
11+
);
612
let octokit: Octokit;
713

814
export const handler = async (event: any) => {
915
try {
1016
await initializeOctokit();
1117

12-
const recentCommits = await getRecentCommits();
13-
if (recentCommits.length) {
14-
const newContent = await getNewContent(recentCommits);
15-
if (newContent.length) {
16-
const data = await getContentData(newContent);
17-
await processNewContent(data);
18+
let newContent: { fileName: string; commit: string }[] = [];
19+
if (event.body) {
20+
const body = JSON.parse(event.body);
21+
console.log(JSON.stringify({ body }, null, 2));
22+
if (body.commits) {
23+
newContent = body.commits.reduce(
24+
(
25+
p: { fileName: string; commit: string }[],
26+
commit: {
27+
id: string;
28+
added: string[];
29+
modified: string[];
30+
// ... there's more stuff here, but this is all we need
31+
}
32+
) => {
33+
const addedFiles = commit.added.filter(
34+
(addedFile: string) =>
35+
(!blogPathDefined ||
36+
addedFile.startsWith(`${process.env.BLOG_PATH}/`)) &&
37+
addedFile.endsWith(".md")
38+
);
39+
return [
40+
...p,
41+
...addedFiles.map((addedFile) => ({
42+
fileName: addedFile,
43+
commit: commit.id,
44+
})),
45+
];
46+
},
47+
[] as { fileName: string; commit: string }[]
48+
);
49+
} else {
50+
const recentCommits = await getRecentCommits();
51+
if (recentCommits.length) {
52+
newContent = await getNewContent(recentCommits);
53+
}
1854
}
1955
}
56+
if (newContent.length) {
57+
const data = await getContentData(newContent);
58+
const imagesProcessed = await saveImagesToS3(data);
59+
await processNewContent(imagesProcessed);
60+
}
2061
} catch (err) {
2162
console.error(err);
2263
}
@@ -60,10 +101,12 @@ const getNewContent = async (commits: string[]) => {
60101
ref: commits[j],
61102
});
62103

63-
const blogPath = process.env.BLOG_PATH && process.env.BLOG_PATH !== "/";
64104
const newFiles = commitDetail.data.files?.filter(
65105
(f) =>
66-
f.status == "added" && (!blogPath || f.filename.startsWith(`${process.env.BLOG_PATH}/`))
106+
f.status == "added" &&
107+
(!blogPathDefined ||
108+
f.filename.startsWith(`${process.env.BLOG_PATH}/`)) &&
109+
f.filename.endsWith(".md")
67110
);
68111
newContent.push(
69112
...(newFiles?.map((p) => {
@@ -120,8 +163,72 @@ const saveImagesToS3 = async (
120163
sendStatusEmail: boolean;
121164
}[]
122165
) => {
123-
// TODO: regex for images stored in github and fetch them / store them in a public s3 bucket
124-
}
166+
const contentData: {
167+
fileName: string;
168+
commit: string;
169+
content: string;
170+
sendStatusEmail: boolean;
171+
}[] = [];
172+
const imgRegex = /!\[(.*?)\]\((.*?)\)/g;
173+
for (let j = 0; j < newContent.length; j++) {
174+
const workingContent = { ...newContent[j] };
175+
const imageSet = new Set<string>([]);
176+
let match;
177+
while ((match = imgRegex.exec(newContent[j].content)) !== null) {
178+
imageSet.add(match[2]);
179+
}
180+
const images = [...imageSet];
181+
if (images.length === 0) {
182+
// no images in the post... passthrough
183+
contentData.push(newContent[j]);
184+
continue;
185+
}
186+
const blogFile = newContent[j].fileName;
187+
const blogSplit = `${blogFile}`.split("/");
188+
blogSplit.pop();
189+
const blogBase = blogSplit.join("/");
190+
const s3Mapping: Record<string, string> = {};
191+
for (let k = 0; k < images.length; k++) {
192+
const image = images[k];
193+
const githubPath = join(blogBase, image);
194+
const imageSplit = image.split(".");
195+
const imageExtension = imageSplit[imageSplit.length - 1];
196+
const s3Path = `${blogFile}/${k}.${imageExtension}`.replace(/\ /g, "-");
197+
const s3Url = `https://s3.amazonaws.com/${process.env.MEDIA_BUCKET}/${s3Path}`;
198+
console.log(
199+
JSON.stringify({ image, githubPath, s3Path, s3Url }, null, 2)
200+
);
201+
const postContent = await octokit.request(
202+
"GET /repos/{owner}/{repo}/contents/{path}",
203+
{
204+
owner: `${process.env.OWNER}`,
205+
repo: `${process.env.REPO}`,
206+
path: githubPath,
207+
}
208+
);
209+
210+
const buffer = Buffer.from((postContent.data as any).content, "base64");
211+
212+
// upload images to s3
213+
const putImage = new PutObjectCommand({
214+
Bucket: `${process.env.MEDIA_BUCKET}`,
215+
Key: s3Path,
216+
Body: buffer,
217+
});
218+
await s3.send(putImage);
219+
220+
s3Mapping[image] = s3Url;
221+
}
222+
const rewriteLink = (match: string, text: string, url: string) => {
223+
console.log(JSON.stringify({ match, text, url }));
224+
return `![${text}](${s3Mapping[url]})`;
225+
}
226+
workingContent.content = workingContent.content.replace(imgRegex, rewriteLink);
227+
contentData.push(workingContent);
228+
}
229+
console.log(JSON.stringify({ contentData }));
230+
return contentData;
231+
};
125232

126233
const processNewContent = async (
127234
newContent: {
@@ -131,16 +238,18 @@ const processNewContent = async (
131238
sendStatusEmail: boolean;
132239
}[]
133240
) => {
134-
const executions = await Promise.allSettled(newContent.map(async (content) => {
135-
const command = new StartExecutionCommand({
136-
stateMachineArn: process.env.STATE_MACHINE_ARN,
137-
input: JSON.stringify(content)
138-
});
139-
await sfn.send(command);
140-
}));
241+
const executions = await Promise.allSettled(
242+
newContent.map(async (content) => {
243+
const command = new StartExecutionCommand({
244+
stateMachineArn: process.env.STATE_MACHINE_ARN,
245+
input: JSON.stringify(content),
246+
});
247+
await sfn.send(command);
248+
})
249+
);
141250

142251
for (const execution of executions) {
143-
if (execution.status == 'rejected') {
252+
if (execution.status == "rejected") {
144253
console.error(execution.reason);
145254
}
146255
}

functions/parse-dev-post.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ const formatDevData = (
6565
canonical_url: process.env.AMPLIFY_BASE_URL ? `${process.env.AMPLIFY_BASE_URL}/${postDetail.data.slug.replace(
6666
/^\/|\/$/g,
6767
""
68-
)}` : ``,
68+
)}` : `${process.env.CANONICAL}`,
6969
}),
7070
description: postDetail.data.description,
7171
tags: [

functions/parse-hashnode-post.ts

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,17 @@ const formatHashnodeData = (
7171
title: postDetail.data.title,
7272
contentMarkdown: hashnodeContent,
7373
coverImageURL: postDetail.data.image,
74-
isRepublished: {
75-
...(process.env.CANONICAL === "hashnode" ? {} : {
76-
originalArticleURL: process.env.AMPLIFY_BASE_URL ? `${process.env.AMPLIFY_BASE_URL}/${postDetail.data.slug.replace(/^\/|\/$/g, "")}` : ``,
77-
}),
78-
},
74+
...(process.env.CANONICAL === "hashnode"
75+
? {}
76+
: {
77+
isRepublished: {
78+
originalArticleURL: process.env.AMPLIFY_BASE_URL
79+
? `${
80+
process.env.AMPLIFY_BASE_URL
81+
}/${postDetail.data.slug.replace(/^\/|\/$/g, "")}`
82+
: `${process.env.CANONICAL}`,
83+
},
84+
}),
7985
tags: [],
8086
subtitle: postDetail.data.description,
8187
},

functions/parse-medium-post.ts

Lines changed: 62 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,69 @@ import { getLinks } from "./utils/getLinks";
22
import { getTweets } from "./utils/getTweets";
33
import { getTweetUrl } from "./utils/getTweetUrl";
44

5-
const frontmatter = require('@github-docs/frontmatter');
5+
const frontmatter = require("@github-docs/frontmatter");
66

7-
export const handler = async (state: { post: any; format: string; articleCatalog: any; canonical?: string; }) => {
7+
export const handler = async (state: {
8+
post: any;
9+
format: string;
10+
articleCatalog: any;
11+
canonical?: string;
12+
}) => {
813
const details = frontmatter(state.post);
914
const links = getLinks(details.content);
1015
const tweets = getTweets(details.content);
1116

12-
const payload = formatMediumData(details, state.articleCatalog, links, tweets);
13-
17+
const payload = formatMediumData(
18+
details,
19+
state.articleCatalog,
20+
links,
21+
tweets
22+
);
23+
1424
return {
1525
payload,
16-
url: `/${details.data.slug.replace(/^\/|\/$/g, '')}`
26+
url: `/${details.data.slug.replace(/^\/|\/$/g, "")}`,
1727
};
1828
};
1929

20-
const formatMediumData = (postDetail: { data: { title: any; description: any; image_attribution: any; image: any; categories: any; tags: any; slug: string; }; content: string | any[]; }, articleCatalog: any[], links: any, tweets: any) => {
21-
let mediumContent = `\n# ${postDetail.data.title}\n`
22-
+ `#### ${postDetail.data.description}\n`
23-
+ `![${postDetail.data.image_attribution ?? ''}](${postDetail.data.image})\n`
24-
+ `${postDetail.content.slice(0)}`;
30+
const formatMediumData = (
31+
postDetail: {
32+
data: {
33+
title: any;
34+
description: any;
35+
image_attribution: any;
36+
image: any;
37+
categories: any;
38+
tags: any;
39+
slug: string;
40+
};
41+
content: string | any[];
42+
},
43+
articleCatalog: any[],
44+
links: any,
45+
tweets: any
46+
) => {
47+
let mediumContent =
48+
`\n# ${postDetail.data.title}\n` +
49+
`#### ${postDetail.data.description}\n` +
50+
`![${postDetail.data.image_attribution ?? ""}](${
51+
postDetail.data.image
52+
})\n` +
53+
`${postDetail.content.slice(0)}`;
2554

2655
for (const link of links) {
27-
const replacement = articleCatalog.find(c => c.links.M.url.S == link[1]);
56+
const replacement = articleCatalog.find((c) => c.links.M.url.S == link[1]);
2857
if (replacement) {
2958
if (replacement.links.M.mediumUrl && replacement.links.M.mediumUrl.S) {
30-
mediumContent = mediumContent.replace(link[1], replacement.links.M.mediumUrl.S);
59+
mediumContent = mediumContent.replace(
60+
link[1],
61+
replacement.links.M.mediumUrl.S
62+
);
3163
} else {
32-
mediumContent = mediumContent.replace(link[1], `${process.env.AMPLIFY_BASE_URL}${replacement.links.M.url.S}`);
64+
mediumContent = mediumContent.replace(
65+
link[1],
66+
`${process.env.AMPLIFY_BASE_URL}${replacement.links.M.url.S}`
67+
);
3368
}
3469
}
3570
}
@@ -41,18 +76,22 @@ const formatMediumData = (postDetail: { data: { title: any; description: any; im
4176

4277
const mediumData = {
4378
title: postDetail.data.title,
44-
contentFormat: 'markdown',
79+
contentFormat: "markdown",
4580
tags: [...postDetail.data.categories, ...postDetail.data.tags],
46-
...(process.env.CANONICAL === "medium" ? {} : {
47-
canonical_url: process.env.AMPLIFY_BASE_URL ? `${process.env.AMPLIFY_BASE_URL}/${postDetail.data.slug.replace(
48-
/^\/|\/$/g,
49-
""
50-
)}` : ``,
51-
}),
52-
publishStatus: 'draft',
81+
...(process.env.CANONICAL === "medium"
82+
? {}
83+
: {
84+
canonical_url: process.env.AMPLIFY_BASE_URL
85+
? `${process.env.AMPLIFY_BASE_URL}/${postDetail.data.slug.replace(
86+
/^\/|\/$/g,
87+
""
88+
)}`
89+
: `${process.env.CANONICAL}`,
90+
}),
91+
publishStatus: "draft",
5392
notifyFollowers: true,
54-
content: mediumContent
93+
content: mediumContent,
5594
};
5695

5796
return mediumData;
58-
};
97+
};

functions/send-api-request.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export const handler = async (state: { secretKey: any; request: { method: any; b
2020
}
2121
};
2222
} else {
23+
console.log(JSON.stringify({ config, state }, null, 2));
2324
const response = await axios.request(config);
2425
return response.data;
2526
}

lib/blog-crossposting-automation-stack.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
import { StackProps, Stack, CfnOutput, Duration } from "aws-cdk-lib";
1+
import { StackProps, Stack, CfnOutput, Duration, RemovalPolicy } from "aws-cdk-lib";
22
import { EventBus, Rule } from "aws-cdk-lib/aws-events";
33
import {
44
LambdaFunction,
55
} from "aws-cdk-lib/aws-events-targets";
66
import { Architecture, FunctionUrlAuthType, Runtime } from "aws-cdk-lib/aws-lambda";
77
import { NodejsFunction, NodejsFunctionProps } from "aws-cdk-lib/aws-lambda-nodejs";
8+
import { Bucket } from "aws-cdk-lib/aws-s3";
89
import { Secret } from "aws-cdk-lib/aws-secretsmanager";
910
import { Construct } from "constructs";
1011
import { join } from "path";
@@ -19,7 +20,6 @@ export interface BlogCrosspostingAutomationStackProps extends StackProps {
1920
amplifyProjectId: string;
2021
blogBaseUrl: string;
2122
};
22-
// TODO: properly handle canonical urls for non-amplify blogs
2323
canonical: "dev" | "medium" | "hashnode" | "amplify";
2424
commitTimeToleranceMinutes?: number;
2525
devTo?: {
@@ -163,6 +163,12 @@ export class BlogCrosspostingAutomationStack extends Stack {
163163
});
164164
table.grantWriteData(loadCrossPostsFn);
165165

166+
const mediaBucket = new Bucket(this, `BlogPostMediaBucket`, {
167+
autoDeleteObjects: true,
168+
publicReadAccess: true,
169+
removalPolicy: RemovalPolicy.DESTROY,
170+
});
171+
166172
const identifyNewContentFn = new NodejsFunction(
167173
this,
168174
`IdentifyNewContentFn`,
@@ -174,6 +180,8 @@ export class BlogCrosspostingAutomationStack extends Stack {
174180
identifyNewContentFn.addEnvironment("OWNER", github.owner);
175181
identifyNewContentFn.addEnvironment("REPO", github.repo);
176182
identifyNewContentFn.addEnvironment("BLOG_PATH", github.path);
183+
identifyNewContentFn.addEnvironment("MEDIA_BUCKET", mediaBucket.bucketName);
184+
mediaBucket.grantReadWrite(identifyNewContentFn);
177185
if (commitTimeToleranceMinutes) {
178186
identifyNewContentFn.addEnvironment(
179187
"COMMIT_TIME_TOLERANCE_MINUTES",
@@ -228,6 +236,7 @@ export class BlogCrosspostingAutomationStack extends Stack {
228236

229237
const { stateMachine } = new CrossPostStepFunction(this, `CrossPostStepFn`, crossPostStepFunctionProps);
230238
stateMachine.grantStartExecution(identifyNewContentFn);
239+
identifyNewContentFn.addEnvironment("STATE_MACHINE_ARN", stateMachine.stateMachineArn);
231240
table.grantReadWriteData(stateMachine);
232241
eventBus.grantPutEventsTo(stateMachine);
233242
}

0 commit comments

Comments
 (0)