crewAIInc · VinciGit00 · Sep 29, 2025
diff --git a/crewai_tools/tools/__init__.py b/crewai_tools/tools/__init__.py
@@ -86,6 +86,10 @@
 from .scrapegraph_scrape_tool.scrapegraph_scrape_tool import (
     ScrapegraphScrapeTool,
     ScrapegraphScrapeToolSchema,
+    FixedScrapegraphScrapeToolSchema,
+    ScrapeMethod,
+    ScrapegraphError,
+    RateLimitError,
 )
 from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import (
     ScrapflyScrapeWebsiteTool,

diff --git a/crewai_tools/tools/scrapegraph_scrape_tool/README.md b/crewai_tools/tools/scrapegraph_scrape_tool/README.md
@@ -1,84 +1,296 @@
-# ScrapegraphScrapeTool
+# ScrapeGraph AI Multi-Method Scraper Tool
 
 ## Description
-A tool that leverages Scrapegraph AI's SmartScraper API to intelligently extract content from websites. This tool provides advanced web scraping capabilities with AI-powered content extraction, making it ideal for targeted data collection and content analysis tasks.
+A comprehensive CrewAI tool that integrates with ScrapeGraph AI to provide intelligent web scraping capabilities using multiple methods. This enhanced tool supports 6 different scraping approaches, from basic content extraction to complex interactive automation.
+
+## Features
+
+The tool supports 6 different scraping methods:
+
+### 1. SmartScraper (Default)
+Intelligent content extraction using AI to understand and extract relevant information from web pages.
+
+```python
+from crewai_tools import ScrapegraphScrapeTool, ScrapeMethod
+
+tool = ScrapegraphScrapeTool()
+result = tool.run(
+    website_url="https://example.com",
+    method=ScrapeMethod.SMARTSCRAPER,
+    user_prompt="Extract company information"
+)
+```
+
+### 2. SearchScraper
+Search-based content gathering from multiple sources across the web.
+
+```python
+result = tool.run(
+    method=ScrapeMethod.SEARCHSCRAPER,
+    user_prompt="Latest AI developments",
+    num_results=5  # 1-20 sources
+)
+```
+
+### 3. AgenticScraper
+Interactive scraping with automated actions like clicking buttons, filling forms, etc.
+
+```python
+result = tool.run(
+    website_url="https://example.com",
+    method=ScrapeMethod.AGENTICSCRAPER,
+    steps=[
+        "Type [email protected] in email field",
+        "Type password123 in password field",
+        "Click login button"
+    ],
+    use_session=True,
+    ai_extraction=True,
+    user_prompt="Extract dashboard information"
+)
+```
+
+### 4. Crawl
+Multi-page crawling with depth control and domain restrictions.
+
+```python
+result = tool.run(
+    website_url="https://example.com",
+    method=ScrapeMethod.CRAWL,
+    user_prompt="Extract all product information",
+    depth=2,
+    max_pages=10,
+    same_domain_only=True,
+    cache_website=True
+)
+```
+
+### 5. Scrape
+Raw HTML extraction with JavaScript rendering support.
+
+```python
+result = tool.run(
+    website_url="https://example.com",
+    method=ScrapeMethod.SCRAPE,
+    render_heavy_js=True,
+    headers={"User-Agent": "Custom Agent"}
+)
+```
+
+### 6. Markdownify
+Convert web content to markdown format.
+
+```python
+result = tool.run(
+    website_url="https://example.com",
+    method=ScrapeMethod.MARKDOWNIFY
+)
+```
 
 ## Installation
 Install the required packages:
 ```shell
 pip install 'crewai[tools]'
+pip install scrapegraph-py
+```
+
+## Schema Support
+
+All methods support structured data extraction using JSON schemas:
+
+```python
+schema = {
+    "type": "object",
+    "properties": {
+        "title": {"type": "string"},
+        "content": {"type": "string"},
+        "authors": {
+            "type": "array",
+            "items": {"type": "string"}
+        }
+    }
+}
+
+result = tool.run(
+    website_url="https://example.com",
+    method=ScrapeMethod.SMARTSCRAPER,
+    data_schema=schema
+)
+```
+
+## Configuration Options
+
+- `method`: Scraping method (ScrapeMethod enum)
+- `render_heavy_js`: Enable JavaScript rendering for dynamic content
+- `headers`: Custom HTTP headers
+- `data_schema`: JSON schema for structured data extraction
+- `steps`: Action steps for agentic scraping (required for AgenticScraper)
+- `num_results`: Number of search results (1-20, for SearchScraper)
+- `depth`: Crawling depth (1-5, for Crawl)
+- `max_pages`: Maximum pages to crawl
+- `same_domain_only`: Restrict crawling to same domain
+- `cache_website`: Cache content for faster subsequent requests
+- `use_session`: Maintain session state for agentic scraping
+- `ai_extraction`: Enable AI extraction for agentic scraping
+- `timeout`: Request timeout (10-600 seconds)
+
+## Setup
+
+1. Set your API key:
+```bash
+export SCRAPEGRAPH_API_KEY="your-api-key-here"
 ```
 
-## Example Usage
+Or use a `.env` file:
+```
+SCRAPEGRAPH_API_KEY=your-api-key-here
+```
 
-### Basic Usage
+2. Initialize the tool:
 ```python
-from crewai_tools import ScrapegraphScrapeTool
+from crewai_tools import ScrapegraphScrapeTool, ScrapeMethod
+
+# Basic initialization
+tool = ScrapegraphScrapeTool()
 
-# Basic usage with API key
-tool = ScrapegraphScrapeTool(api_key="your_api_key")
+# With fixed URL
+tool = ScrapegraphScrapeTool(website_url="https://example.com")
+
+# With custom API key
+tool = ScrapegraphScrapeTool(api_key="your-api-key")
+```
+
+## Advanced Examples
+
+### Interactive Form Automation
+```python
 result = tool.run(
-    website_url="https://www.example.com",
-    user_prompt="Extract the main heading and summary"
+    website_url="https://dashboard.example.com",
+    method=ScrapeMethod.AGENTICSCRAPER,
+    steps=[
+        "Type [email protected] in the email input field",
+        "Type mypassword in the password input field",
+        "Click the login button",
+        "Wait for the dashboard to load",
+        "Click on the profile section"
+    ],
+    use_session=True,
+    ai_extraction=True,
+    user_prompt="Extract user profile information and account details"
 )
 ```
 
-### Fixed Website URL
+### Multi-Source Research
 ```python
-# Initialize with a fixed website URL
-tool = ScrapegraphScrapeTool(
-    website_url="https://www.example.com",
-    api_key="your_api_key"
+result = tool.run(
+    method=ScrapeMethod.SEARCHSCRAPER,
+    user_prompt="Latest developments in web scraping technology and tools",
+    num_results=10
 )
-result = tool.run()
+print(f"Research findings: {result['result']}")
+print(f"Sources: {result['reference_urls']}")
 ```
 
-### Custom Prompt
+### Comprehensive Website Crawling
 ```python
-# With custom prompt
-tool = ScrapegraphScrapeTool(
-    api_key="your_api_key",
-    user_prompt="Extract all product prices and descriptions"
+result = tool.run(
+    website_url="https://company.com",
+    method=ScrapeMethod.CRAWL,
+    user_prompt="Extract all product information, pricing, and company details",
+    depth=3,
+    max_pages=20,
+    same_domain_only=True,
+    cache_website=True,
+    data_schema={
+        "type": "object",
+        "properties": {
+            "products": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "price": {"type": "string"},
+                        "description": {"type": "string"}
+                    }
+                }
+            },
+            "company_info": {
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string"},
+                    "description": {"type": "string"},
+                    "contact": {"type": "string"}
+                }
+            }
+        }
+    }
 )
-result = tool.run(website_url="https://www.example.com")
 ```
 
-### Error Handling
+## Error Handling
+
+The tool handles various error conditions:
+
 ```python
+from crewai_tools import ScrapegraphScrapeTool, ScrapegraphError, RateLimitError
+
 try:
-    tool = ScrapegraphScrapeTool(api_key="your_api_key")
+    tool = ScrapegraphScrapeTool()
     result = tool.run(
-        website_url="https://www.example.com",
-        user_prompt="Extract the main heading"
+        website_url="https://example.com",
+        method=ScrapeMethod.SMARTSCRAPER
     )
 except ValueError as e:
-    print(f"Configuration error: {e}")  # Handles invalid URLs or missing API keys
+    print(f"Configuration error: {e}")  # Invalid parameters or missing API key
+except RateLimitError as e:
+    print(f"Rate limit exceeded: {e}")  # API rate limits exceeded
+except ScrapegraphError as e:
+    print(f"ScrapeGraph API error: {e}")  # General API errors
 except RuntimeError as e:
-    print(f"Scraping error: {e}")  # Handles API or network errors
+    print(f"Scraping operation failed: {e}")  # Other runtime errors
 ```
 
-## Arguments
-- `website_url`: The URL of the website to scrape (required if not set during initialization)
-- `user_prompt`: Custom instructions for content extraction (optional)
-- `api_key`: Your Scrapegraph API key (required, can be set via SCRAPEGRAPH_API_KEY environment variable)
-
 ## Environment Variables
-- `SCRAPEGRAPH_API_KEY`: Your Scrapegraph API key, you can obtain one [here](https://scrapegraphai.com)
+- `SCRAPEGRAPH_API_KEY`: Your ScrapeGraph API key, you can obtain one [here](https://scrapegraphai.com)
 
 ## Rate Limiting
-The Scrapegraph API has rate limits that vary based on your subscription plan. Consider the following best practices:
+The ScrapeGraph API has rate limits that vary based on your subscription plan. Consider the following best practices:
 - Implement appropriate delays between requests when processing multiple URLs
 - Handle rate limit errors gracefully in your application
-- Check your API plan limits on the Scrapegraph dashboard
-
-## Error Handling
-The tool may raise the following exceptions:
-- `ValueError`: When API key is missing or URL format is invalid
-- `RuntimeError`: When scraping operation fails (network issues, API errors)
-- `RateLimitError`: When API rate limits are exceeded
+- Check your API plan limits on the ScrapeGraph dashboard
+- Use caching for frequently accessed content
 
 ## Best Practices
-1. Always validate URLs before making requests
-2. Implement proper error handling as shown in examples
-3. Consider caching results for frequently accessed pages
-4. Monitor your API usage through the Scrapegraph dashboard
+
+1. **Method Selection**: Choose the appropriate method for your use case:
+   - Use `SmartScraper` for general content extraction
+   - Use `SearchScraper` for research across multiple sources
+   - Use `AgenticScraper` for sites requiring interaction
+   - Use `Crawl` for comprehensive site mapping
+   - Use `Scrape` for raw HTML when you need full control
+   - Use `Markdownify` for content formatting
+
+2. **Schema Design**: When using `data_schema`, design clear, specific schemas for better extraction results
+
+3. **Session Management**: Use `use_session=True` for `AgenticScraper` when you need to maintain state across interactions
+
+4. **Performance**: Enable `cache_website=True` for crawling operations to improve performance
+
+5. **Error Handling**: Always implement proper error handling as shown in examples
+
+6. **Validation**: Validate URLs and parameters before making requests
+
+7. **Monitoring**: Monitor your API usage through the ScrapeGraph dashboard
+
+## Examples
+
+See `examples/scrapegraph_tool_examples.py` for complete working examples of all methods.
+
+## API Reference
+
+- **ScrapeMethod**: Enum of available scraping methods (SMARTSCRAPER, SEARCHSCRAPER, AGENTICSCRAPER, CRAWL, SCRAPE, MARKDOWNIFY)
+- **ScrapegraphScrapeToolSchema**: Input validation schema for flexible URL usage
+- **FixedScrapegraphScrapeToolSchema**: Schema for tools with fixed URLs
+- **ScrapegraphError**: Base exception class for ScrapeGraph-related errors
+- **RateLimitError**: Specialized exception for rate limiting scenarios
diff --git a/crewai_tools/tools/scrapegraph_scrape_tool/__init__.py b/crewai_tools/tools/scrapegraph_scrape_tool/__init__.py
@@ -0,0 +1,17 @@
+from .scrapegraph_scrape_tool import (
+    ScrapegraphScrapeTool,
+    ScrapegraphScrapeToolSchema,
+    FixedScrapegraphScrapeToolSchema,
+    ScrapeMethod,
+    ScrapegraphError,
+    RateLimitError,
+)
+
+__all__ = [
+    "ScrapegraphScrapeTool",
+    "ScrapegraphScrapeToolSchema",
+    "FixedScrapegraphScrapeToolSchema",
+    "ScrapeMethod",
+    "ScrapegraphError",
+    "RateLimitError",
+]