Skip to content

Commit 8734ff2

Browse files
committed
version with all updated
1 parent d131aa0 commit 8734ff2

File tree

6 files changed

+26
-19
lines changed

6 files changed

+26
-19
lines changed

ingesters/__tests__/AsciiDocIngester.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ describe('AsciiDocIngester', () => {
6666
bookConfig: {
6767
repoOwner: 'test-owner',
6868
repoName: 'test-repo',
69-
fileExtensions: '.adoc',
69+
fileExtensions: ['.adoc'],
7070
chunkSize: 1000,
7171
chunkOverlap: 200,
7272
baseUrl: 'https://example.com',
@@ -300,7 +300,7 @@ This is page 2 content.`,
300300
bookConfig: {
301301
repoOwner: 'test-owner',
302302
repoName: 'test-repo',
303-
fileExtensions: '.adoc',
303+
fileExtensions: ['.adoc'],
304304
chunkSize: 1000,
305305
chunkOverlap: 200,
306306
baseUrl: '',

ingesters/__tests__/MarkdownIngester.test.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const markdownIngester = new TestMarkdownIngester(
1515
repoOwner: 'test',
1616
repoName: 'test',
1717
baseUrl: 'https://test.com',
18-
fileExtensions: 'md',
18+
fileExtensions: ['md'],
1919
urlSuffix: '.html',
2020
chunkSize: 1000,
2121
chunkOverlap: 100,
@@ -311,7 +311,7 @@ describe('URL sourcing and generation', () => {
311311
repoOwner: 'test',
312312
repoName: 'test',
313313
baseUrl: 'https://docs.example.com',
314-
fileExtensions: 'md',
314+
fileExtensions: ['md'],
315315
urlSuffix: '.html',
316316
chunkSize: 1000,
317317
chunkOverlap: 100,
@@ -342,7 +342,7 @@ describe('URL sourcing and generation', () => {
342342
repoOwner: 'test',
343343
repoName: 'test',
344344
baseUrl: 'https://docs.starknet.io',
345-
fileExtensions: 'md',
345+
fileExtensions: ['md'],
346346
urlSuffix: '',
347347
chunkSize: 1000,
348348
chunkOverlap: 100,
@@ -379,7 +379,7 @@ describe('URL sourcing and generation', () => {
379379
repoOwner: 'test',
380380
repoName: 'test',
381381
baseUrl: 'https://book.cairo-lang.org',
382-
fileExtensions: 'md',
382+
fileExtensions: ['md'],
383383
urlSuffix: '.html',
384384
chunkSize: 1000,
385385
chunkOverlap: 100,

ingesters/src/IngesterFactory.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ export class IngesterFactory {
4949
case 'scarb_docs':
5050
return new ScarbDocsIngester();
5151

52-
case 'starknet_js':
53-
return new StarknetJSIngester();
52+
// case 'starknet_js':
53+
// return new StarknetJSIngester();
5454

5555
case 'starknet_blog':
5656
return new StarknetBlogIngester();

ingesters/src/ingesters/StarknetJSIngester.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export class StarknetJSIngester extends MarkdownIngester {
2626
useUrlMapping: true,
2727
};
2828

29-
super(config, DocumentSource.STARKNET_JS);
29+
super(config, DocumentSource.CAIRO_BOOK);
3030
}
3131

3232
protected getExtractDir(): string {

ingesters/src/types/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export enum DocumentSource {
1515
OPENZEPPELIN_DOCS = 'openzeppelin_docs',
1616
CORELIB_DOCS = 'corelib_docs',
1717
SCARB_DOCS = 'scarb_docs',
18-
STARKNET_JS = 'starknet_js',
18+
// STARKNET_JS = 'starknet_js',
1919
STARKNET_BLOG = 'starknet_blog',
2020
DOJO_DOCS = 'dojo_docs',
2121
}

ingesters/src/utils/fileUtils.ts

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,27 @@ export async function processDocFiles(
2626

2727
for (const entry of entries) {
2828
const fullPath = path.join(dir, entry.name);
29-
for (const fileExtension of config.fileExtensions) {
30-
if (entry.isDirectory()) {
31-
// Recursively process subdirectories
32-
await processDirectory(fullPath);
33-
} else if (
34-
entry.isFile() &&
35-
path.extname(entry.name).toLowerCase() === fileExtension
36-
) {
29+
30+
if (entry.isDirectory()) {
31+
// Recursively process subdirectories
32+
await processDirectory(fullPath);
33+
} else if (entry.isFile()) {
34+
// Check if the file matches any of the configured extensions
35+
const fileExt = path.extname(entry.name).toLowerCase();
36+
if (config.fileExtensions.includes(fileExt)) {
3737
// Process documentation files
3838
const content = await fs.readFile(fullPath, 'utf8');
39+
40+
// Skip empty files
41+
if (content.trim().length === 0) {
42+
logger.warn(`Skipping empty file: ${fullPath}`);
43+
continue;
44+
}
45+
3946
pages.push({
4047
name: path
4148
.relative(directory, fullPath)
42-
.replace(fileExtension, ''),
49+
.replace(fileExt, ''),
4350
content,
4451
});
4552
}

0 commit comments

Comments
 (0)