-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate-definitions.mjs
109 lines (93 loc) · 3.37 KB
/
create-definitions.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import EPub from 'epub';
import { promises as fs } from 'fs';
import { JSDOM } from 'jsdom';
import 'dotenv/config';
import OpenAI from 'openai';
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
async function extractTextFromChapter(epub, chapterId) {
return new Promise((resolve, reject) => {
epub.getChapter(chapterId, (err, htmlContent) => {
if (err) {
reject(err);
return;
}
const { document } = new JSDOM(htmlContent).window;
const paragraphs = document.querySelectorAll('div');
const text = Array.from(paragraphs).map((p) => p.textContent).join(' ');
resolve(text);
});
});
}
async function getDefinitions(words, title, author) {
const definitions = [];
for (const word of words) {
console.log(`Defining "${word}"`);
try {
const response = await openai.chat.completions.create({
model: "gpt-4-1106-preview",
temperature: 0.5,
max_tokens: 512,
top_p: 0.5,
frequency_penalty: 0,
presence_penalty: 0,
messages: [
{
role: "system",
content: `
You are a function that takes a word and returns a terse single sentence definition that DOES NOT explain context, ONLY the meaning.
The words are from the book "${title}" by ${author} and cannot be found in a dictionary.
DO NOT use the word at the start of the definition.
DO NOT use the name of the author, book or series.
Use other languages like Greek or Latin or mythological, religious or historical references to determine the meaning of the word.
`
},
{
role: "user",
content: `${word}`
}
],
});
console.log('Definition: ',response.choices[0].message.content.trim());
console.log('---');
const lastMessage = response.choices[0].message.content.trim();
definitions.push({
word,
definition: lastMessage
});
} catch (error) {
console.error(`Error while trying to define the word "${word}":`, error);
}
}
return definitions;
}
async function findUniqueWordsAndDefine(epubFile, wordsFile) {
try {
const epub = new EPub(epubFile);
await new Promise((resolve, reject) => {
epub.on('end', resolve);
epub.on('error', reject);
epub.parse();
});
const title = epub.metadata.title;
const author = epub.metadata.creator;
const chaptersText = await Promise.all(
epub.flow.map((chapter) => extractTextFromChapter(epub, chapter.id))
);
const epubWords = new Set(
chaptersText.join(' ').match(/\b(?![IVXLCDM]+\b)(?!\d+\b)([a-z]+(?:['-][a-z]+)?)\b/g) || []
);
const wordsText = await fs.readFile(wordsFile, 'utf-8');
const wordsList = new Set(wordsText.toLowerCase().split(/\r?\n/));
const uniqueWords = Array.from(epubWords).filter((word) => !wordsList.has(word));
const wordsToDefine = uniqueWords.sort();
const definitions = await getDefinitions(wordsToDefine, title, author);
await fs.writeFile('definitions.json', JSON.stringify(definitions, null, 2), 'utf-8');
console.log('Definitions saved to definitions.json');
} catch (error) {
console.error('Error processing files:', error);
}
}
const [, , epubFile, wordsFile] = process.argv;
findUniqueWordsAndDefine(epubFile, wordsFile);