diff --git a/src/config.ts b/src/config.ts index 787744ce..13178e47 100644 --- a/src/config.ts +++ b/src/config.ts @@ -26,6 +26,15 @@ export const configSchema = z.object({ * @default "" */ exclude: z.string().or(z.array(z.string())).optional(), + /** + * Set Crawlee strategy to check certain parts of the URLs found. + * @example "same-origin" + * @default "same-hostname" + * @see https://crawlee.dev/api/core/enum/EnqueueStrategy + */ + crawlStrategy: z + .enum(["all", "same-origin", "same-hostname", "same-domain"]) + .optional(), /** * Selector to grab the inner text from * @example ".docs-builder-container" diff --git a/src/core.ts b/src/core.ts index 05a9f8e3..07519aff 100644 --- a/src/core.ts +++ b/src/core.ts @@ -96,7 +96,11 @@ export async function crawl(config: Config) { exclude: typeof config.exclude === "string" ? [config.exclude] - : config.exclude ?? [], + : (config.exclude ?? []), + strategy: + typeof config.crawlStrategy === "string" + ? config.crawlStrategy + : undefined, }); }, // Comment this option to scrape the full website.