Skip to content

Commit 25916c3

Browse files
committed
improve prompt of AI mdx-to-md convertor
1 parent 6428f31 commit 25916c3

4 files changed

Lines changed: 116 additions & 25 deletions

File tree

mdx-to-md-converter/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
node_modules
22
dist
3+
.env
4+
.hash-cache.json

mdx-to-md-converter/src/index.ts

Lines changed: 50 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { convertStep } from './modules/convertStep';
1818
import { removeHrAndLayout } from './modules/removeHrAndLayout';
1919
import { removeIndentations } from './modules/removeIndentations';
2020
import { overviewByAI } from './modules/overviewByAI';
21+
import { loadHashCache, saveHashCache, hasFileChanged, updateFileHash } from './modules/hashCache';
2122

2223
import fs from 'fs';
2324
import path from 'path';
@@ -125,21 +126,50 @@ async function main() {
125126
fs.mkdirSync(outputDir, { recursive: true });
126127
}
127128

128-
// Array to store all the generated links
129+
const hashCache = loadHashCache();
130+
console.log('Loaded hash cache');
131+
129132
const allLinks: string[] = [];
130133

134+
let processedCount = 0;
135+
let skippedCount = 0;
136+
131137
for (const filePath of mdxFiles) {
132138
try {
133-
console.log(`Processing: ${filePath}`);
134-
135139
const mdxContent = readMdxFile(filePath);
136140

141+
if (!hasFileChanged(filePath, mdxContent, hashCache)) {
142+
console.log(`Skipped (unchanged): ${filePath}`);
143+
skippedCount++;
144+
145+
const relativePath = getRelativePath(filePath, srcPagesPath);
146+
const mdFileName = relativePath.replace(/\.mdx$/, '.md');
147+
const outputFilePath = path.join(outputDir, mdFileName);
148+
149+
if (fs.existsSync(outputFilePath)) {
150+
const existingContent = fs.readFileSync(outputFilePath, 'utf-8');
151+
const urlPath = `llms/${mdFileName.replace(/\\/g, '/')}`;
152+
const url = `https://docs.liara.ir/${urlPath}`;
153+
154+
let title = mdFileName.replace(/\.md$/, '').replace(/\//g, ' > ');
155+
const headingMatch = existingContent.match(/^#\s+(.+)$/m);
156+
if (headingMatch) {
157+
title = headingMatch[1].trim();
158+
}
159+
160+
allLinks.push(`- [${title}](${url})`);
161+
}
162+
163+
continue;
164+
}
165+
166+
console.log(`Processing (new/modified): ${filePath}`);
167+
processedCount++;
168+
137169
const mdContent = convertMdxToMd(mdxContent);
138170

139-
// Extra step: Store the final MD output in informal_md variable
140171
const informal_md = mdContent;
141172

142-
// Process with AI to get overview
143173
console.log(`Processing with AI: ${filePath}`);
144174
const aiProcessedContent = await overviewByAI(informal_md);
145175

@@ -152,32 +182,26 @@ async function main() {
152182
fs.mkdirSync(outputFileDir, { recursive: true });
153183
}
154184

155-
// Build "original link" header (maps MD back to the human docs page)
156-
// Example:
157-
// relativePath: "ai/google-gemini.mdx"
158-
// original URL: "https://docs.liara.ir/ai/google-gemini/"
159185
let originalPath = relativePath.replace(/\.mdx$/, '');
160186
if (originalPath.endsWith('/index')) {
161187
originalPath = originalPath.slice(0, -('/index'.length));
162188
}
163189
const originalUrl = `https://docs.liara.ir/${originalPath}${originalPath.endsWith('/') ? '' : '/'}`;
164190
const originalHeader = `Original link: ${originalUrl}\n\n`;
165191

166-
// Add the "all links" section to the end of each MD file
167192
const finalMdContent =
168193
originalHeader +
169194
aiProcessedContent +
170195
'\n\n## all links\n\n[All links of docs](https://docs.liara.ir/all-links-llms.txt)\n';
171196

172-
// Write file with explicit UTF-8 encoding and BOM
173197
fs.writeFileSync(outputFilePath, '\ufeff' + finalMdContent, { encoding: 'utf8' });
174198
console.log(`Saved: ${outputFilePath}`);
175199

176-
// Generate URL for all-links.txt (keep .md extension for static files)
200+
updateFileHash(filePath, mdxContent, hashCache);
201+
177202
const urlPath = `llms/${mdFileName.replace(/\\/g, '/')}`;
178203
const url = `https://docs.liara.ir/${urlPath}`;
179204

180-
// Extract title from the first heading in the content or use filename
181205
let title = mdFileName.replace(/\.md$/, '').replace(/\//g, ' > ');
182206
const headingMatch = mdContent.match(/^#\s+(.+)$/m);
183207
if (headingMatch) {
@@ -191,21 +215,24 @@ async function main() {
191215
}
192216
}
193217

194-
// Generate all-links.txt content
195-
const allLinksContent = `# All Links\n\n${allLinks.sort().join('\n')}\n`;
196-
197-
// Write all-links.txt with explicit UTF-8 encoding and BOM
198-
fs.writeFileSync(allLinksPath, '\ufeff' + allLinksContent, { encoding: 'utf8' });
199-
console.log(`✅ All links saved to: ${allLinksPath}`);
200-
201-
console.log(`All files converted and saved to: ${outputDir}`);
202-
console.log(`Total files processed: ${mdxFiles.length}`);
218+
saveHashCache(hashCache);
219+
console.log('Saved hash cache');
220+
221+
const allLinksContent = `# All Links\n\n${allLinks.sort().join('\n')}\n`;
222+
223+
fs.writeFileSync(allLinksPath, '\ufeff' + allLinksContent, { encoding: 'utf8' });
224+
console.log(`All links saved to: ${allLinksPath}`);
225+
226+
console.log('\nSummary:');
227+
console.log(` Total files found: ${mdxFiles.length}`);
228+
console.log(` Processed (new/modified): ${processedCount}`);
229+
console.log(` Skipped (unchanged): ${skippedCount}`);
230+
console.log(` All files saved to: ${outputDir}`);
203231

204232
} catch (err) {
205233
console.error('Error:', err);
206234
process.exit(1);
207235
}
208236
}
209237

210-
// Run the main function
211238
main();
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import crypto from 'crypto';
2+
import fs from 'fs';
3+
import path from 'path';
4+
5+
const CACHE_FILE = path.join(process.cwd(), '.hash-cache.json');
6+
7+
interface HashCache {
8+
[filePath: string]: string;
9+
}
10+
11+
export function generateHash(content: string): string {
12+
return crypto.createHash('md5').update(content).digest('hex');
13+
}
14+
15+
export function loadHashCache(): HashCache {
16+
try {
17+
if (fs.existsSync(CACHE_FILE)) {
18+
const data = fs.readFileSync(CACHE_FILE, 'utf-8');
19+
return JSON.parse(data);
20+
}
21+
} catch (error) {
22+
console.warn('Failed to load hash cache, starting fresh:', error);
23+
}
24+
return {};
25+
}
26+
27+
export function saveHashCache(cache: HashCache): void {
28+
try {
29+
fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2), 'utf-8');
30+
} catch (error) {
31+
console.error('Failed to save hash cache:', error);
32+
}
33+
}
34+
35+
export function hasFileChanged(filePath: string, content: string, cache: HashCache): boolean {
36+
const currentHash = generateHash(content);
37+
const cachedHash = cache[filePath];
38+
39+
if (!cachedHash) {
40+
return true;
41+
}
42+
43+
return currentHash !== cachedHash;
44+
}
45+
46+
export function updateFileHash(filePath: string, content: string, cache: HashCache): void {
47+
cache[filePath] = generateHash(content);
48+
}

mdx-to-md-converter/src/modules/overviewByAI.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { generateText } from 'ai';
44

55
export async function overviewByAI(informal_md: string): Promise<string> {
66
try {
7+
78
const baseURL = process.env.MY_BASE_URL;
89
const apiKey = process.env.MY_API_KEY;
910

@@ -16,8 +17,21 @@ export async function overviewByAI(informal_md: string): Promise<string> {
1617
name: 'liara-ai',
1718
baseURL,
1819
apiKey,
19-
}).chatModel("google/gemini-2.0-flash-001"),
20-
prompt: `convert the text below to the pure markdown file, do not change any content on it:\n\n${informal_md}`,
20+
}).chatModel("openai/gpt-4.1-mini"),
21+
prompt: `Convert the following incorrect Markdown content into standard Markdown. Do not rewrite, summarize, delete, add, or modify any content. Only remove or transform MDX elements into the valid Markdown.
22+
Input MDX:
23+
24+
\n\n${informal_md}\n\n
25+
26+
Output the result as pure Markdown only, with no explanations or commentary.
27+
28+
considaring:
29+
- If you encounter links with a full path (starting with \`/\`), rewrite them so they begin with: https://docs.liara.ir
30+
- If you encounter links with a relative path (starting with dot), DO NOT Change it
31+
- Treat <Card /> as lists in the resulting Markdown and only link matters (not icons)
32+
- Treat <Tab /> and their content as separated H2s.
33+
34+
`,
2135
});
2236

2337
return text;

0 commit comments

Comments
 (0)