Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Firecrawl - Extract actions #16069

Merged
merged 5 commits into from
Apr 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/bloomerang/bloomerang.app.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ export default {
console.log(Object.keys(this.$auth));
},
},
};
};
2 changes: 1 addition & 1 deletion components/firecrawl/actions/crawl-url/crawl-url.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export default {
key: "firecrawl-crawl-url",
name: "Crawl URL",
description: "Crawls a given URL and returns the contents of sub-pages. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-post)",
version: "1.0.1",
version: "1.0.2",
type: "action",
props: {
firecrawl,
Expand Down
96 changes: 96 additions & 0 deletions components/firecrawl/actions/extract-data/extract-data.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import firecrawl from "../../firecrawl.app.mjs";
import { ConfigurationError } from "@pipedream/platform";
import { parseObjectEntries } from "../../common/utils.mjs";

export default {
key: "firecrawl-extract-data",
name: "Extract Data",
description: "Extract structured data from one or multiple URLs. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/extract)",
version: "0.0.1",
type: "action",
props: {
firecrawl,
urls: {
type: "string[]",
label: "URLs",
description: "An array of one or more URLs. Supports wildcards (/*) for broader crawling.",
},
prompt: {
type: "string",
label: "Prompt",
description: "(Optional unless no schema): A natural language prompt describing the data you want or specifying how you want that data structured.",
optional: true,
},
schema: {
type: "object",
label: "Schema",
description: "(Optional unless no prompt): A more rigid structure if you already know the JSON layout.",
optional: true,
},
enableWebSearch: {
type: "boolean",
label: "Enable Web Search",
description: "When `true`, the extraction will use web search to find additional data",
optional: true,
},
ignoreSitemap: {
type: "boolean",
label: "Ignore Sitemap",
description: "When true, sitemap.xml files will be ignored during website scanning",
optional: true,
},
includeSubdomains: {
type: "boolean",
label: "Include Subdomains",
description: "When true, subdomains of the provided URLs will also be scanned",
optional: true,
},
showSources: {
type: "boolean",
label: "Show Sources",
description: "When true, the sources used to extract the data will be included in the response",
optional: true,
},
waitForCompletion: {
type: "boolean",
label: "Wait For Completion",
description: "Set to `true` to poll the API in 3-second intervals until the job is completed",
optional: true,
},
},
async run({ $ }) {
if (!this.prompt && !this.schema) {
throw new ConfigurationError("Must enter one of Prompt or Schema");
}

let response = await this.firecrawl.extract({
$,
data: {
urls: this.urls,
prompt: this.prompt,
schema: this.schema && parseObjectEntries(this.schema),
enableWebSearch: this.enableWebSearch,
ignoreSitemap: this.ignoreSitemap,
includeSubdomains: this.includeSubdomains,
showSources: this.showSources,
},
});

if (this.waitForCompletion) {
const id = response.id;
const timer = (ms) => new Promise((res) => setTimeout(res, ms));
do {
response = await this.firecrawl.getExtractStatus({
$,
id,
});
await timer(3000);
} while (response.status === "processing");
}

if (response.success) {
$.export("$summary", "Successfully extracted data.");
}
return response;
},
};
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export default {
key: "firecrawl-get-crawl-status",
name: "Get Crawl Data",
description: "Obtains the status and data from a previous crawl operation. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-get)",
version: "0.0.2",
version: "0.0.3",
type: "action",
props: {
firecrawl,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import firecrawl from "../../firecrawl.app.mjs";

export default {
key: "firecrawl-get-extract-status",
name: "Get Extract Data",
description: "Obtains the status and data from a previous extract operation. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/extract-get)",
version: "0.0.1",
type: "action",
props: {
firecrawl,
extractId: {
type: "string",
label: "Extract Job ID",
description: "The ID of the extract job",
},
},
async run({ $ }) {
const response = await this.firecrawl.getExtractStatus({
$,
id: this.extractId,
});

$.export("$summary", `Successfully retrieved status for extract (ID: ${this.extractId})`);
return response;
},
};
2 changes: 1 addition & 1 deletion components/firecrawl/actions/scrape-page/scrape-page.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export default {
name: "Scrape Page",
description:
"Scrapes a URL and returns content from that page. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/scrape)",
version: "1.0.0",
version: "1.0.1",
type: "action",
props: {
firecrawl,
Expand Down
15 changes: 15 additions & 0 deletions components/firecrawl/firecrawl.app.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,20 @@ export default {
...opts,
});
},
extract(opts = {}) {
return this._makeRequest({
method: "POST",
path: "/extract",
...opts,
});
},
getExtractStatus({
id, ...opts
}) {
return this._makeRequest({
path: `/extract/${id}`,
...opts,
});
},
},
};
2 changes: 1 addition & 1 deletion components/firecrawl/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@pipedream/firecrawl",
"version": "1.0.1",
"version": "1.1.0",
"description": "Pipedream FireCrawl Components",
"main": "firecrawl.app.mjs",
"keywords": [
Expand Down
2 changes: 1 addition & 1 deletion components/hyperbrowser/hyperbrowser.app.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ export default {
console.log(Object.keys(this.$auth));
},
},
};
};
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ export default {
console.log(Object.keys(this.$auth));
},
},
};
};
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ export default {
console.log(Object.keys(this.$auth));
},
},
};
};
Loading