|
| 1 | +import axios from 'axios'; |
| 2 | +import handleError from './utils/handleError.js'; |
| 3 | +import { ZodType } from 'zod'; |
| 4 | +import { zodToJsonSchema } from 'zod-to-json-schema'; |
| 5 | + |
| 6 | +/** |
| 7 | + * Extract structured data from local HTML content using ScrapeGraph AI. |
| 8 | + * |
| 9 | + * @param {string} apiKey - The API key for ScrapeGraph AI. |
| 10 | + * @param {string} websiteHtml - HTML content as a string from the local web page to scrape. |
| 11 | + * @param {string} prompt - A natural language description of the data to extract. |
| 12 | + * @param {Object} [schema] - (Optional) Schema object defining the structure of the desired output. |
| 13 | + * @returns {Promise<string>} A JSON string containing the extracted data, formatted to match the schema. |
| 14 | + * @throws {Error} If an HTTP error or validation issue occurs. |
| 15 | + */ |
| 16 | +export async function localScraper(apiKey, websiteHtml, prompt, schema = null) { |
| 17 | + const endpoint = 'https://api.scrapegraphai.com/v1/localscraper'; |
| 18 | + const headers = { |
| 19 | + 'accept': 'application/json', |
| 20 | + 'SGAI-APIKEY': apiKey, |
| 21 | + 'Content-Type': 'application/json', |
| 22 | + }; |
| 23 | + |
| 24 | + const payload = { |
| 25 | + website_html: websiteHtml, |
| 26 | + user_prompt: prompt, |
| 27 | + }; |
| 28 | + |
| 29 | + if (schema) { |
| 30 | + if (schema instanceof ZodType) { |
| 31 | + payload.output_schema = zodToJsonSchema(schema); |
| 32 | + } else { |
| 33 | + throw new Error('The schema must be an instance of a valid Zod schema'); |
| 34 | + } |
| 35 | + } |
| 36 | + |
| 37 | + try { |
| 38 | + const response = await axios.post(endpoint, payload, { headers }); |
| 39 | + return response.data; |
| 40 | + } catch (error) { |
| 41 | + handleError(error); |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +/** |
| 46 | + * Retrieve the status or result of a localScraper request, including results of previous requests. |
| 47 | + * |
| 48 | + * @param {string} apiKey - The API key for ScrapeGraph AI. |
| 49 | + * @param {string} requestId - The unique ID associated with the localScraper request. |
| 50 | + * @returns {Promise<string>} A JSON string containing the status or result of the scraping request. |
| 51 | + * @throws {Error} If an error occurs while retrieving the request details. |
| 52 | + */ |
| 53 | +export async function getLocalScraperRequest(apiKey, requestId) { |
| 54 | + const endpoint = 'https://api.scrapegraphai.com/v1/localscraper/' + requestId; |
| 55 | + const headers = { |
| 56 | + 'accept': 'application/json', |
| 57 | + 'SGAI-APIKEY': apiKey, |
| 58 | + }; |
| 59 | + |
| 60 | + try { |
| 61 | + const response = await axios.get(endpoint, { headers }); |
| 62 | + return response.data; |
| 63 | + } catch (error) { |
| 64 | + handleError(error); |
| 65 | + } |
| 66 | +} |
0 commit comments