diff --git a/components/bloomerang/bloomerang.app.mjs b/components/bloomerang/bloomerang.app.mjs index ce26e01555960..2c7b573271969 100644 --- a/components/bloomerang/bloomerang.app.mjs +++ b/components/bloomerang/bloomerang.app.mjs @@ -8,4 +8,4 @@ export default { console.log(Object.keys(this.$auth)); }, }, -}; \ No newline at end of file +}; diff --git a/components/firecrawl/actions/crawl-url/crawl-url.mjs b/components/firecrawl/actions/crawl-url/crawl-url.mjs index 746982f06e738..d3141cbb8f1fc 100644 --- a/components/firecrawl/actions/crawl-url/crawl-url.mjs +++ b/components/firecrawl/actions/crawl-url/crawl-url.mjs @@ -5,7 +5,7 @@ export default { key: "firecrawl-crawl-url", name: "Crawl URL", description: "Crawls a given URL and returns the contents of sub-pages. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-post)", - version: "1.0.1", + version: "1.0.2", type: "action", props: { firecrawl, diff --git a/components/firecrawl/actions/extract-data/extract-data.mjs b/components/firecrawl/actions/extract-data/extract-data.mjs new file mode 100644 index 0000000000000..123b533b77fa8 --- /dev/null +++ b/components/firecrawl/actions/extract-data/extract-data.mjs @@ -0,0 +1,96 @@ +import firecrawl from "../../firecrawl.app.mjs"; +import { ConfigurationError } from "@pipedream/platform"; +import { parseObjectEntries } from "../../common/utils.mjs"; + +export default { + key: "firecrawl-extract-data", + name: "Extract Data", + description: "Extract structured data from one or multiple URLs. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/extract)", + version: "0.0.1", + type: "action", + props: { + firecrawl, + urls: { + type: "string[]", + label: "URLs", + description: "An array of one or more URLs. Supports wildcards (/*) for broader crawling.", + }, + prompt: { + type: "string", + label: "Prompt", + description: "(Optional unless no schema): A natural language prompt describing the data you want or specifying how you want that data structured.", + optional: true, + }, + schema: { + type: "object", + label: "Schema", + description: "(Optional unless no prompt): A more rigid structure if you already know the JSON layout.", + optional: true, + }, + enableWebSearch: { + type: "boolean", + label: "Enable Web Search", + description: "When `true`, the extraction will use web search to find additional data", + optional: true, + }, + ignoreSitemap: { + type: "boolean", + label: "Ignore Sitemap", + description: "When true, sitemap.xml files will be ignored during website scanning", + optional: true, + }, + includeSubdomains: { + type: "boolean", + label: "Include Subdomains", + description: "When true, subdomains of the provided URLs will also be scanned", + optional: true, + }, + showSources: { + type: "boolean", + label: "Show Sources", + description: "When true, the sources used to extract the data will be included in the response", + optional: true, + }, + waitForCompletion: { + type: "boolean", + label: "Wait For Completion", + description: "Set to `true` to poll the API in 3-second intervals until the job is completed", + optional: true, + }, + }, + async run({ $ }) { + if (!this.prompt && !this.schema) { + throw new ConfigurationError("Must enter one of Prompt or Schema"); + } + + let response = await this.firecrawl.extract({ + $, + data: { + urls: this.urls, + prompt: this.prompt, + schema: this.schema && parseObjectEntries(this.schema), + enableWebSearch: this.enableWebSearch, + ignoreSitemap: this.ignoreSitemap, + includeSubdomains: this.includeSubdomains, + showSources: this.showSources, + }, + }); + + if (this.waitForCompletion) { + const id = response.id; + const timer = (ms) => new Promise((res) => setTimeout(res, ms)); + do { + response = await this.firecrawl.getExtractStatus({ + $, + id, + }); + await timer(3000); + } while (response.status === "processing"); + } + + if (response.success) { + $.export("$summary", "Successfully extracted data."); + } + return response; + }, +}; diff --git a/components/firecrawl/actions/get-crawl-status/get-crawl-status.mjs b/components/firecrawl/actions/get-crawl-status/get-crawl-status.mjs index da556ed111620..a7eb49c728c14 100644 --- a/components/firecrawl/actions/get-crawl-status/get-crawl-status.mjs +++ b/components/firecrawl/actions/get-crawl-status/get-crawl-status.mjs @@ -4,7 +4,7 @@ export default { key: "firecrawl-get-crawl-status", name: "Get Crawl Data", description: "Obtains the status and data from a previous crawl operation. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-get)", - version: "0.0.2", + version: "0.0.3", type: "action", props: { firecrawl, diff --git a/components/firecrawl/actions/get-extract-status/get-extract-status.mjs b/components/firecrawl/actions/get-extract-status/get-extract-status.mjs new file mode 100644 index 0000000000000..4f93d2ee7db18 --- /dev/null +++ b/components/firecrawl/actions/get-extract-status/get-extract-status.mjs @@ -0,0 +1,26 @@ +import firecrawl from "../../firecrawl.app.mjs"; + +export default { + key: "firecrawl-get-extract-status", + name: "Get Extract Data", + description: "Obtains the status and data from a previous extract operation. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/extract-get)", + version: "0.0.1", + type: "action", + props: { + firecrawl, + extractId: { + type: "string", + label: "Extract Job ID", + description: "The ID of the extract job", + }, + }, + async run({ $ }) { + const response = await this.firecrawl.getExtractStatus({ + $, + id: this.extractId, + }); + + $.export("$summary", `Successfully retrieved status for extract (ID: ${this.extractId})`); + return response; + }, +}; diff --git a/components/firecrawl/actions/scrape-page/scrape-page.mjs b/components/firecrawl/actions/scrape-page/scrape-page.mjs index 29e796b9f268a..9f01378db4072 100644 --- a/components/firecrawl/actions/scrape-page/scrape-page.mjs +++ b/components/firecrawl/actions/scrape-page/scrape-page.mjs @@ -7,7 +7,7 @@ export default { name: "Scrape Page", description: "Scrapes a URL and returns content from that page. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/scrape)", - version: "1.0.0", + version: "1.0.1", type: "action", props: { firecrawl, diff --git a/components/firecrawl/firecrawl.app.mjs b/components/firecrawl/firecrawl.app.mjs index 7faf968f7dda3..1d075c3863b80 100644 --- a/components/firecrawl/firecrawl.app.mjs +++ b/components/firecrawl/firecrawl.app.mjs @@ -62,5 +62,20 @@ export default { ...opts, }); }, + extract(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/extract", + ...opts, + }); + }, + getExtractStatus({ + id, ...opts + }) { + return this._makeRequest({ + path: `/extract/${id}`, + ...opts, + }); + }, }, }; diff --git a/components/firecrawl/package.json b/components/firecrawl/package.json index f96f82bdfe1a3..c96fe632f2267 100644 --- a/components/firecrawl/package.json +++ b/components/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/firecrawl", - "version": "1.0.1", + "version": "1.1.0", "description": "Pipedream FireCrawl Components", "main": "firecrawl.app.mjs", "keywords": [ diff --git a/components/hyperbrowser/hyperbrowser.app.mjs b/components/hyperbrowser/hyperbrowser.app.mjs index 43b63ea3611c7..0a777809cb6bc 100644 --- a/components/hyperbrowser/hyperbrowser.app.mjs +++ b/components/hyperbrowser/hyperbrowser.app.mjs @@ -8,4 +8,4 @@ export default { console.log(Object.keys(this.$auth)); }, }, -}; \ No newline at end of file +}; diff --git a/components/nutrient_workflow_automation/nutrient_workflow_automation.app.mjs b/components/nutrient_workflow_automation/nutrient_workflow_automation.app.mjs index 5142a1a580b0a..0e23b6c915384 100644 --- a/components/nutrient_workflow_automation/nutrient_workflow_automation.app.mjs +++ b/components/nutrient_workflow_automation/nutrient_workflow_automation.app.mjs @@ -8,4 +8,4 @@ export default { console.log(Object.keys(this.$auth)); }, }, -}; \ No newline at end of file +}; diff --git a/components/oracle_cloud_infrastructure/oracle_cloud_infrastructure.app.mjs b/components/oracle_cloud_infrastructure/oracle_cloud_infrastructure.app.mjs index 8c901d0f0a783..001f007c40770 100644 --- a/components/oracle_cloud_infrastructure/oracle_cloud_infrastructure.app.mjs +++ b/components/oracle_cloud_infrastructure/oracle_cloud_infrastructure.app.mjs @@ -8,4 +8,4 @@ export default { console.log(Object.keys(this.$auth)); }, }, -}; \ No newline at end of file +};