Skip to content

Commit 87b9252

Browse files
authored
Merge pull request #21 from ScrapeGraphAI/pre/beta
Pre/beta
2 parents 6de5eb2 + b7330cb commit 87b9252

File tree

14 files changed

+370
-183
lines changed

14 files changed

+370
-183
lines changed

.github/workflows/python-publish.yml

-4
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,11 @@ name: Upload Python Package
66
on:
77
release:
88
types: [published]
9-
paths:
10-
- 'scrapegraph-py/**'
119

1210
jobs:
1311
deploy:
1412

1513
runs-on: ubuntu-latest
16-
# Only run if scrapegraph-py has changes
17-
if: contains(github.event.release.body, 'scrapegraph-py/')
1814

1915
steps:
2016
- uses: actions/checkout@v4

.github/workflows/release.yml

-4
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,11 @@ on:
44
branches:
55
- main
66
- pre/*
7-
paths:
8-
- 'scrapegraph-py/**'
97

108
jobs:
119
build:
1210
name: Build
1311
runs-on: ubuntu-latest
14-
# Only run if scrapegraph-py has changes
15-
if: contains(github.event.head_commit.modified, 'scrapegraph-py/') || contains(github.event.head_commit.added, 'scrapegraph-py/') || contains(github.event.head_commit.removed, 'scrapegraph-py/')
1612
steps:
1713
- name: Install git
1814
run: |

scrapegraph-js/README.md

+33-2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ yarn add scrapegraph-js
3636

3737
```javascript
3838
import { smartScraper } from 'scrapegraph-js';
39+
import 'dotenv/config';
3940

4041
// Initialize variables
4142
const apiKey = process.env.SGAI_APIKEY; // Set your API key as an environment variable
@@ -106,12 +107,43 @@ const schema = z.object({
106107
})();
107108
```
108109

110+
### Scraping local HTML
111+
112+
Extract structured data from local HTML content
113+
114+
```javascript
115+
import { localScraper } from 'scrapegraph-js';
116+
117+
const apiKey = 'your_api_key';
118+
const prompt = 'What does the company do?';
119+
120+
const websiteHtml = `<html>
121+
<body>
122+
<h1>Company Name</h1>
123+
<p>We are a technology company focused on AI solutions.</p>
124+
<div class="contact">
125+
<p>Email: [email protected]</p>
126+
</div>
127+
</body>
128+
</html>`;
129+
(async () => {
130+
try {
131+
const response = await localScraper(apiKey, websiteHtml, prompt);
132+
console.log(response);
133+
} catch (error) {
134+
console.error(error);
135+
}
136+
})();
137+
```
138+
109139
### Markdownify
140+
110141
Converts a webpage into clean, well-structured markdown format.
142+
111143
```javascript
112144
import { smartScraper } from 'scrapegraph-js';
113145

114-
const apiKey = "your_api_key";
146+
const apiKey = 'your_api_key';
115147
const url = 'https://scrapegraphai.com/';
116148

117149
(async () => {
@@ -124,7 +156,6 @@ const url = 'https://scrapegraphai.com/';
124156
})();
125157
```
126158

127-
128159
### Checking API Credits
129160

130161
```javascript
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import { localScraper, getLocalScraperRequest } from 'scrapegraph-js';
2+
import 'dotenv/config';
3+
4+
// localScraper function example
5+
const apiKey = process.env.SGAI_APIKEY;
6+
const prompt = 'What does the company do?';
7+
8+
const websiteHtml = `<html>
9+
<body>
10+
<h1>Company Name</h1>
11+
<p>We are a technology company focused on AI solutions.</p>
12+
<div class="contact">
13+
<p>Email: [email protected]</p>
14+
</div>
15+
</body>
16+
</html>`;
17+
18+
try {
19+
const response = await localScraper(apiKey, websiteHtml, prompt);
20+
console.log(response);
21+
} catch (error) {
22+
console.error(error);
23+
}
24+
25+
// getLocalScraperFunctionExample
26+
const requestId = 'b8d97545-9ed3-441b-a01f-4b661b4f0b4c';
27+
28+
try {
29+
const response = await getLocalScraperRequest(apiKey, requestId);
30+
console.log(response);
31+
} catch (error) {
32+
console.log(error);
33+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { localScraper } from 'scrapegraph-js';
2+
import { z } from 'zod';
3+
import 'dotenv/config';
4+
5+
// localScraper function example
6+
const apiKey = process.env.SGAI_APIKEY;
7+
const prompt = 'extract contact';
8+
9+
const websiteHtml = `<html>
10+
<body>
11+
<h1>Company Name</h1>
12+
<p>We are a technology company focused on AI solutions.</p>
13+
<div class="contact">
14+
<p>Email: [email protected]</p>
15+
</div>
16+
</body>
17+
</html>`;
18+
19+
const schema = z.object({
20+
contact: z.string().describe('email contact'),
21+
});
22+
23+
try {
24+
const response = await localScraper(apiKey, websiteHtml, prompt, schema);
25+
console.log(response);
26+
} catch (error) {
27+
console.error(error);
28+
}

scrapegraph-js/index.js

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
export { smartScraper, getSmartScraperRequest } from './src/smartScraper.js';
22
export { markdownify, getMarkdownifyRequest } from './src/markdownify.js';
3+
export { localScraper, getLocalScraperRequest } from './src/localScraper.js';
34
export { getCredits } from './src/credits.js';
45
export { sendFeedback } from './src/feedback.js';

scrapegraph-js/src/localScraper.js

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import axios from 'axios';
2+
import handleError from './utils/handleError.js';
3+
import { ZodType } from 'zod';
4+
import { zodToJsonSchema } from 'zod-to-json-schema';
5+
6+
/**
7+
* Extract structured data from local HTML content using ScrapeGraph AI.
8+
*
9+
* @param {string} apiKey - The API key for ScrapeGraph AI.
10+
* @param {string} websiteHtml - HTML content as a string from the local web page to scrape.
11+
* @param {string} prompt - A natural language description of the data to extract.
12+
* @param {Object} [schema] - (Optional) Schema object defining the structure of the desired output.
13+
* @returns {Promise<string>} A JSON string containing the extracted data, formatted to match the schema.
14+
* @throws {Error} If an HTTP error or validation issue occurs.
15+
*/
16+
export async function localScraper(apiKey, websiteHtml, prompt, schema = null) {
17+
const endpoint = 'https://api.scrapegraphai.com/v1/localscraper';
18+
const headers = {
19+
'accept': 'application/json',
20+
'SGAI-APIKEY': apiKey,
21+
'Content-Type': 'application/json',
22+
};
23+
24+
const payload = {
25+
website_html: websiteHtml,
26+
user_prompt: prompt,
27+
};
28+
29+
if (schema) {
30+
if (schema instanceof ZodType) {
31+
payload.output_schema = zodToJsonSchema(schema);
32+
} else {
33+
throw new Error('The schema must be an instance of a valid Zod schema');
34+
}
35+
}
36+
37+
try {
38+
const response = await axios.post(endpoint, payload, { headers });
39+
return response.data;
40+
} catch (error) {
41+
handleError(error);
42+
}
43+
}
44+
45+
/**
46+
* Retrieve the status or result of a localScraper request, including results of previous requests.
47+
*
48+
* @param {string} apiKey - The API key for ScrapeGraph AI.
49+
* @param {string} requestId - The unique ID associated with the localScraper request.
50+
* @returns {Promise<string>} A JSON string containing the status or result of the scraping request.
51+
* @throws {Error} If an error occurs while retrieving the request details.
52+
*/
53+
export async function getLocalScraperRequest(apiKey, requestId) {
54+
const endpoint = 'https://api.scrapegraphai.com/v1/localscraper/' + requestId;
55+
const headers = {
56+
'accept': 'application/json',
57+
'SGAI-APIKEY': apiKey,
58+
};
59+
60+
try {
61+
const response = await axios.get(endpoint, { headers });
62+
return response.data;
63+
} catch (error) {
64+
handleError(error);
65+
}
66+
}

scrapegraph-js/src/markdownify.js

+5-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import handleError from './utils/handleError.js';
99
* @returns {Promise<string>} A promise that resolves to the markdown representation of the webpage.
1010
* @throws {Error} Throws an error if the HTTP request fails.
1111
*/
12-
export async function markdownify(apiKey, url){
12+
export async function markdownify(apiKey, url) {
1313
const endpoint = 'https://api.scrapegraphai.com/v1/markdownify';
1414
const headers = {
1515
'accept': 'application/json',
@@ -24,7 +24,7 @@ export async function markdownify(apiKey, url){
2424
const response = await axios.post(endpoint, payload, { headers });
2525
return response.data;
2626
} catch (error) {
27-
handleError(error)
27+
handleError(error);
2828
}
2929
}
3030

@@ -36,7 +36,7 @@ export async function markdownify(apiKey, url){
3636
* @returns {Promise<string>} A promise that resolves with details about the status or outcome of the specified request.
3737
* @throws {Error} Throws an error if the HTTP request fails.
3838
*/
39-
export async function getMarkdownifyRequest(apiKey, requestId){
39+
export async function getMarkdownifyRequest(apiKey, requestId) {
4040
const endpoint = 'https://api.scrapegraphai.com/v1/markdownify/' + requestId;
4141
const headers = {
4242
'accept': 'application/json',
@@ -47,6 +47,6 @@ export async function getMarkdownifyRequest(apiKey, requestId){
4747
const response = await axios.get(endpoint, { headers });
4848
return response.data;
4949
} catch (error) {
50-
handleError(error)
50+
handleError(error);
5151
}
52-
}
52+
}

scrapegraph-py/CHANGELOG.md

+45
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,48 @@
1+
## [1.9.0-beta.5](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.9.0-beta.4...v1.9.0-beta.5) (2025-01-03)
2+
3+
4+
### Bug Fixes
5+
6+
* updated hatchling version ([740933a](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/740933aff79a5873e6d1c633afcedb674d1f4cf0))
7+
8+
## [1.9.0-beta.4](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.9.0-beta.3...v1.9.0-beta.4) (2025-01-03)
9+
10+
11+
### Bug Fixes
12+
13+
* improve api desc ([62243f8](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/62243f84384ae238c0bd0c48abc76a6b99376c74))
14+
15+
## [1.9.0-beta.3](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.9.0-beta.2...v1.9.0-beta.3) (2024-12-10)
16+
17+
18+
### Bug Fixes
19+
20+
* come back to py 3.10 ([26d3a75](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/26d3a75ed973590e21d55c985bf71f3905a3ac0e))
21+
22+
## [1.9.0-beta.2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.9.0-beta.1...v1.9.0-beta.2) (2024-12-10)
23+
24+
25+
### Bug Fixes
26+
27+
* add new python compatibility ([77b67f6](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/77b67f646d75abd3a558b40cb31c52c12cc7182e))
28+
29+
## [1.9.0-beta.1](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.8.0...v1.9.0-beta.1) (2024-12-10)
30+
31+
32+
### Features
33+
34+
* add localScraper functionality ([8701eb2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/8701eb2ca7f108b922eb1617c850a58c0f88f8f9))
35+
* revert to old release ([d88a3ac](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/d88a3ac6969a0abdf1f6b8eccde9ad8284d41d20))
36+
37+
38+
### Bug Fixes
39+
40+
* .toml file ([e719881](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/e7198817d8dac802361ab84bc4d5d961fb926767))
41+
* add revert ([09257e0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/09257e08246d8aee96b3944ac14cc14b88e5f818))
42+
* minor fix version ([0b972c6](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/0b972c69a9ea843d8ec89327f35c287b0d7a2bb4))
43+
* pyproject ([2440f7f](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/2440f7f2a5179c6e3a86faf4eefa1d5edf7524c8))
44+
* python version ([24366b0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/24366b08eefe0789da9a0ccafb8058e8744ee58b))
45+
146
## [1.8.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.7.0...v1.8.0) (2024-12-08)
247

348

scrapegraph-py/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ disallow_untyped_calls = true
9292
ignore_missing_imports = true
9393

9494
[build-system]
95-
requires = ["hatchling"]
95+
requires = ["hatchling==1.26.3"]
9696
build-backend = "hatchling.build"
9797

9898
[tool.poe.tasks]

0 commit comments

Comments
 (0)