Skip to content

Commit 3cf829c

Browse files
committed
feat: add automation scripts for SEO audit and LLM index generation
- Add seo-audit.js: Scan markdown files for frontmatter SEO issues - Add llms-txt.js: Generate LLM-friendly site index (llms.txt) - Add reading-sidebar.js: Auto-generate docsify reading sidebar - Add shared config and markdown utilities These scripts improve developer workflow by automating repetitive tasks like sidebar maintenance and SEO optimization. 添加自动化脚本: - SEO 审计脚本:扫描 Markdown 文件的 frontmatter SEO 问题 - LLM 索引生成:为 AI 工具生成友好的站点索引 - 读书笔记侧边栏:自动生成 docsify 侧边栏配置 - 共享配置和工具函数
1 parent 20a9c82 commit 3cf829c

6 files changed

Lines changed: 613 additions & 0 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,8 @@
1111
# 自定义草稿箱
1212
src/_temp/
1313

14+
# Audit report
15+
audit-report.json
16+
1417
# GoodSync
1518
_gsdata_/

scripts/config.js

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/**
2+
* Shared configuration for all scripts.
3+
* Edit this file to adapt to your project.
4+
*/
5+
6+
import fs from "fs";
7+
import path from "path";
8+
import { fileURLToPath } from "url";
9+
10+
const __filename = fileURLToPath(import.meta.url);
11+
const __dirname = path.dirname(__filename);
12+
13+
const docsDir = path.join(__dirname, "../src");
14+
const themeFile = path.join(docsDir, ".vuepress/theme.ts");
15+
const vuepressConfigFile = path.join(docsDir, ".vuepress/config.ts");
16+
17+
/**
18+
* Auto-detect site info from VuePress config files.
19+
*/
20+
function readSiteConfig() {
21+
const site = { baseUrl: "", title: "", description: "" };
22+
23+
try {
24+
if (fs.existsSync(themeFile)) {
25+
const raw = fs.readFileSync(themeFile, "utf-8");
26+
const match = raw.match(/\bhostname\s*:\s*["']([^"']+)["']/);
27+
if (match) site.baseUrl = match[1];
28+
}
29+
} catch {}
30+
31+
try {
32+
if (fs.existsSync(vuepressConfigFile)) {
33+
const raw = fs.readFileSync(vuepressConfigFile, "utf-8");
34+
const titleMatch = raw.match(/\btitle\s*:\s*["']([^"']+)["']/);
35+
const descMatch = raw.match(/\bdescription\s*:\s*["']([^"']+)["']/);
36+
if (titleMatch) site.title = titleMatch[1].trim();
37+
if (descMatch) site.description = descMatch[1].trim();
38+
}
39+
} catch {}
40+
41+
// Environment variable override
42+
if (process.env.BASE_URL) site.baseUrl = process.env.BASE_URL;
43+
44+
return site;
45+
}
46+
47+
const site = readSiteConfig();
48+
49+
if (!fs.existsSync(docsDir)) {
50+
console.error(`❌ Docs directory not found: ${docsDir}`);
51+
console.error(" Please check the docsDir path in scripts/config.js");
52+
process.exit(1);
53+
}
54+
55+
export default {
56+
// Common
57+
docsDir,
58+
59+
// Auto-detected from .vuepress/theme.ts and .vuepress/config.ts
60+
site,
61+
62+
// generate-llms-txt.js
63+
llms: {
64+
outputFile: path.join(docsDir, ".vuepress/dist/llms.txt"),
65+
excludedDirs: ["node_modules", ".vuepress", "_temp"],
66+
},
67+
68+
// audit-frontmatter.js
69+
audit: {
70+
excludedDirs: ["node_modules", ".vuepress", "_temp", "reading"],
71+
outputFile: path.join(__dirname, "../audit-report.json"),
72+
seoRules: {
73+
description: {
74+
minLength: 120,
75+
maxLength: 160,
76+
avoidPhrases: ["本文介绍了", "本文讲述了", "本文分享了", "本篇文章"],
77+
},
78+
title: {
79+
maxLength: 60,
80+
},
81+
shortTitle: {
82+
requiredWhenTitleLong: 50,
83+
},
84+
},
85+
},
86+
87+
// generate-reading-sidebar.js
88+
reading: {
89+
dir: path.join(docsDir, "reading"),
90+
sidebarFile: "_sidebar.md",
91+
ignoreFiles: ["README.md", "_sidebar.md", "_navbar.md", "index.html", ".nojekyll"],
92+
homeEntry: "- [读书方法](README.md)",
93+
// Category directories must match this pattern (e.g. "0_效率与习惯")
94+
categoryPattern: /^\d+_/,
95+
// How to transform directory name for display (e.g. "0_效率与习惯" -> "0.效率与习惯")
96+
categoryNameReplace: [/^(\d+)_/, "$1."],
97+
// Pinned names (shown first in their category, matched without .md)
98+
pinnedFiles: ["原则"],
99+
},
100+
};

scripts/lib/markdown-utils.js

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import fs from "fs";
2+
import path from "path";
3+
import matter from "gray-matter";
4+
5+
/**
6+
* Recursively collect all Markdown files, skipping excluded directories.
7+
*/
8+
export function getAllMarkdownFiles(dir, excludedDirs = []) {
9+
const fileList = [];
10+
11+
const scan = (currentDir) => {
12+
for (const entry of fs.readdirSync(currentDir)) {
13+
const fullPath = path.join(currentDir, entry);
14+
if (fs.statSync(fullPath).isDirectory()) {
15+
if (!excludedDirs.includes(entry)) scan(fullPath);
16+
} else if (entry.endsWith(".md")) {
17+
fileList.push(fullPath);
18+
}
19+
}
20+
};
21+
22+
scan(dir);
23+
return fileList;
24+
}
25+
26+
/**
27+
* Parse frontmatter via gray-matter. Returns data object or null.
28+
*/
29+
export function parseFrontmatter(content) {
30+
try {
31+
const { data } = matter(content);
32+
return Object.keys(data).length > 0 ? data : null;
33+
} catch {
34+
return null;
35+
}
36+
}
37+
38+
/**
39+
* Display-width character length (non-ASCII counts as 2).
40+
* Iterates by code point to correctly handle emoji / surrogate pairs.
41+
*/
42+
export function getCharLength(str) {
43+
if (str == null) return 0;
44+
if (typeof str !== "string") str = String(str);
45+
let len = 0;
46+
for (const char of str) {
47+
len += char.codePointAt(0) > 127 ? 2 : 1;
48+
}
49+
return len;
50+
}

scripts/llms-txt.js

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import fs from "fs";
2+
import path from "path";
3+
import matter from "gray-matter";
4+
import { getAllMarkdownFiles } from "./lib/markdown-utils.js";
5+
import config from "./config.js";
6+
7+
const { baseUrl: BASE_URL, title: SITE_TITLE, description: SITE_DESC } = config.site;
8+
9+
if (!BASE_URL) console.warn("⚠️ No baseUrl detected. Check .vuepress/theme.ts or set BASE_URL env var.");
10+
if (!SITE_TITLE) console.warn("⚠️ No site title detected. Check .vuepress/config.ts");
11+
12+
/**
13+
* Generate URL based on file path
14+
*/
15+
function generateUrl(filePath) {
16+
const relativePath = path.relative(config.docsDir, filePath);
17+
18+
// Handle Docsify reading notes
19+
if (relativePath.startsWith("reading" + path.sep)) {
20+
const docsifyPath = relativePath
21+
.replace(/^reading[\/\\]/, "")
22+
.replace(/\\/g, "/")
23+
.replace(/\.md$/, "");
24+
return `${BASE_URL}/reading/#/${docsifyPath}`;
25+
}
26+
27+
// Handle VuePress pages
28+
let urlPath = relativePath.replace(/\\/g, "/");
29+
30+
if (urlPath.endsWith("README.md") || urlPath.endsWith("index.md")) {
31+
urlPath = urlPath.replace(/(^|\/)(README|index)\.md$/, "$1");
32+
} else {
33+
urlPath = urlPath.replace(/\.md$/, ".html");
34+
}
35+
36+
if (urlPath && !urlPath.endsWith(".html") && !urlPath.endsWith("/")) {
37+
urlPath += "/";
38+
}
39+
40+
return `${BASE_URL}/${urlPath}`;
41+
}
42+
43+
/**
44+
* Clean description text
45+
*/
46+
function cleanDescription(text) {
47+
if (text == null) return "";
48+
if (Array.isArray(text)) text = text.filter(Boolean).join(" ");
49+
if (typeof text !== "string") {
50+
if (typeof text === "number" || typeof text === "boolean") text = String(text);
51+
else return "";
52+
}
53+
return text
54+
.replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1")
55+
.replace(/[\r\n]+/g, " ")
56+
.trim();
57+
}
58+
59+
/**
60+
* Main function
61+
*/
62+
function main() {
63+
console.log("🔍 Scanning for Markdown files...");
64+
console.log(`🌐 Base URL: ${BASE_URL}`);
65+
const files = getAllMarkdownFiles(config.docsDir, config.llms.excludedDirs);
66+
console.log(`📄 Found ${files.length} files.`);
67+
68+
const pages = [];
69+
70+
files.forEach((filePath) => {
71+
try {
72+
const raw = fs.readFileSync(filePath, "utf-8");
73+
const { data, content: body } = matter(raw);
74+
const frontmatter = Object.keys(data).length > 0 ? data : null;
75+
76+
let title = frontmatter?.title ? String(frontmatter.title) : null;
77+
let description = frontmatter?.description ? cleanDescription(frontmatter.description) : null;
78+
79+
// Fallback if no title in frontmatter
80+
if (!title) {
81+
const h1Match = body.match(/^#\s+(.+)$/m);
82+
if (h1Match) {
83+
title = h1Match[1].trim();
84+
}
85+
}
86+
87+
// Fallback: extract first content paragraph from body
88+
if (!description && title) {
89+
for (const line of body.split("\n")) {
90+
const trimmed = line.trim();
91+
if (!trimmed || trimmed.startsWith("#") || trimmed.startsWith(">") || trimmed.startsWith("![") || trimmed.startsWith("```") || trimmed.startsWith("<")) continue;
92+
93+
description = cleanDescription(trimmed);
94+
if (description.length > 200) {
95+
description = description.substring(0, 197) + "...";
96+
}
97+
break;
98+
}
99+
}
100+
101+
if (title) {
102+
pages.push({
103+
title,
104+
description: description || "",
105+
url: generateUrl(filePath),
106+
});
107+
}
108+
} catch (e) {
109+
console.warn(`⚠️ Error processing ${filePath}: ${e.message}`);
110+
}
111+
});
112+
113+
pages.sort((a, b) => a.url.localeCompare(b.url));
114+
115+
let content = `# ${SITE_TITLE}\n\n`;
116+
content += `> ${SITE_DESC}\n\n`;
117+
content += `## Pages\n\n`;
118+
119+
pages.forEach((page) => {
120+
content += `- [${page.title}](${page.url})`;
121+
if (page.description) {
122+
content += ` - ${page.description}`;
123+
}
124+
content += `\n`;
125+
});
126+
127+
const outputDir = path.dirname(config.llms.outputFile);
128+
if (!fs.existsSync(outputDir)) {
129+
fs.mkdirSync(outputDir, { recursive: true });
130+
}
131+
132+
fs.writeFileSync(config.llms.outputFile, content, "utf-8");
133+
console.log(`✅ Generated ${config.llms.outputFile} with ${pages.length} pages.`);
134+
}
135+
136+
main();

scripts/reading-sidebar.js

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/**
2+
* Auto-generate _sidebar.md for Docsify reading notes
3+
* Run: node scripts/generate-reading-sidebar.js
4+
*/
5+
6+
import fs from "fs";
7+
import path from "path";
8+
import config from "./config.js";
9+
10+
const { dir: READING_DIR, sidebarFile, ignoreFiles, homeEntry, categoryPattern, categoryNameReplace, pinnedFiles = {} } = config.reading;
11+
const SIDEBAR_PATH = path.join(READING_DIR, sidebarFile);
12+
13+
/**
14+
* Extract title from markdown file (first H1 heading)
15+
*/
16+
function getTitle(filePath) {
17+
try {
18+
const content = fs.readFileSync(filePath, "utf-8");
19+
const match = content.match(/^#\s+(.+)$/m);
20+
if (match) return match[1].trim();
21+
} catch (e) {
22+
// Ignore read errors
23+
}
24+
return path.basename(filePath, ".md").replace(/_/g, " ");
25+
}
26+
27+
/**
28+
* Scan directory and build sidebar structure
29+
*/
30+
function buildSidebar() {
31+
const lines = [homeEntry];
32+
33+
// Get all category directories matching the configured pattern
34+
const categories = fs
35+
.readdirSync(READING_DIR)
36+
.filter((name) => {
37+
const fullPath = path.join(READING_DIR, name);
38+
return fs.statSync(fullPath).isDirectory() && categoryPattern.test(name);
39+
})
40+
.sort((a, b) => {
41+
const numA = parseInt(a.match(/^(\d+)/)[1]);
42+
const numB = parseInt(b.match(/^(\d+)/)[1]);
43+
return numA - numB;
44+
});
45+
46+
for (const category of categories) {
47+
const categoryPath = path.join(READING_DIR, category);
48+
49+
// Get all .md files in this category, pinned files first
50+
const files = fs
51+
.readdirSync(categoryPath)
52+
.filter((name) => name.endsWith(".md") && !ignoreFiles.includes(name))
53+
.sort((a, b) => {
54+
const pa = pinnedFiles.some((k) => a.includes(k));
55+
const pb = pinnedFiles.some((k) => b.includes(k));
56+
if (pa !== pb) return pa ? -1 : 1;
57+
return a.localeCompare(b, "zh-CN");
58+
});
59+
60+
if (files.length === 0) continue;
61+
62+
const categoryName = category.replace(categoryNameReplace[0], categoryNameReplace[1]);
63+
lines.push(`- ${categoryName}`);
64+
65+
for (const file of files) {
66+
const filePath = path.join(categoryPath, file);
67+
const title = getTitle(filePath);
68+
const relativePath = `${category}/${file}`;
69+
70+
lines.push(` - [${title}](${relativePath})`);
71+
}
72+
}
73+
74+
return lines.join("\n") + "\n";
75+
}
76+
77+
// Main
78+
if (!fs.existsSync(READING_DIR)) {
79+
console.error(`❌ Reading directory not found: ${READING_DIR}`);
80+
process.exit(1);
81+
}
82+
83+
const sidebar = buildSidebar();
84+
fs.writeFileSync(SIDEBAR_PATH, sidebar, "utf-8");
85+
console.log("✅ Generated _sidebar.md with", sidebar.split("\n").length - 1, "entries");

0 commit comments

Comments
 (0)