diff --git a/docs/api/common_tools.md b/docs/api/common_tools.md index 4e5efc690e..6ffa4928cf 100644 --- a/docs/api/common_tools.md +++ b/docs/api/common_tools.md @@ -2,4 +2,6 @@ ::: pydantic_ai.common_tools.duckduckgo +::: pydantic_ai.common_tools.exa + ::: pydantic_ai.common_tools.tavily diff --git a/docs/common-tools.md b/docs/common-tools.md index 78baaaad02..620e8d08ea 100644 --- a/docs/common-tools.md +++ b/docs/common-tools.md @@ -138,3 +138,85 @@ Here are some of the top recent news articles related to GenAI: Feel free to click on the links to dive deeper into each story! """ ``` + +## Exa Search Tool + +!!! info + Exa is a paid service with free credits to explore their product. + + You need to [sign up for an account](https://dashboard.exa.ai) and get an API key to use the Exa tools. + +Exa is a neural search engine that finds high-quality, relevant results across billions of web pages. +It provides several tools including web search, finding similar pages, content retrieval, and AI-powered answers. + +### Installation + +To use Exa tools, you need to install [`pydantic-ai-slim`](install.md#slim-install) with the `exa` optional group: + +```bash +pip/uv-add "pydantic-ai-slim[exa]" +``` + +### Usage + +You can use Exa tools individually or as a toolset. The following tools are available: + +- [`exa_search_tool`][pydantic_ai.common_tools.exa.exa_search_tool]: Search the web with various search types (auto, keyword, neural, fast, deep) +- [`exa_find_similar_tool`][pydantic_ai.common_tools.exa.exa_find_similar_tool]: Find pages similar to a given URL +- [`exa_get_contents_tool`][pydantic_ai.common_tools.exa.exa_get_contents_tool]: Get full text content from URLs +- [`exa_answer_tool`][pydantic_ai.common_tools.exa.exa_answer_tool]: Get AI-powered answers with citations + +#### Using Individual Tools + +```py {title="exa_search.py" test="skip"} +import os + +from pydantic_ai import Agent +from pydantic_ai.common_tools.exa import exa_search_tool + +api_key = os.getenv('EXA_API_KEY') +assert api_key is not None + +agent = Agent( + 'openai:gpt-4o', + tools=[exa_search_tool(api_key, num_results=5, max_characters=1000)], + system_prompt='Search the web for information using Exa.', +) + +result = agent.run_sync('What are the latest developments in quantum computing?') +print(result.output) +``` + +#### Using ExaToolset + +For better efficiency when using multiple Exa tools, use [`ExaToolset`][pydantic_ai.common_tools.exa.ExaToolset] +which shares a single API client across all tools. You can configure which tools to include: + +```py {title="exa_toolset.py" test="skip"} +import os + +from pydantic_ai import Agent +from pydantic_ai.common_tools.exa import ExaToolset + +api_key = os.getenv('EXA_API_KEY') +assert api_key is not None + +toolset = ExaToolset( + api_key, + num_results=5, + max_characters=1000, # Limit text content to control token usage + include_search=True, # Include the search tool (default: True) + include_find_similar=True, # Include the find_similar tool (default: True) + include_get_contents=False, # Exclude the get_contents tool + include_answer=True, # Include the answer tool (default: True) +) + +agent = Agent( + 'openai:gpt-4o', + toolsets=[toolset], + system_prompt='You have access to Exa search tools to find information on the web.', +) + +result = agent.run_sync('Find recent AI research papers and summarize the key findings.') +print(result.output) +``` diff --git a/docs/install.md b/docs/install.md index 77ff4e56c1..43bceed1b7 100644 --- a/docs/install.md +++ b/docs/install.md @@ -58,6 +58,7 @@ pip/uv-add "pydantic-ai-slim[openai]" * `outlines-vllm-offline` - installs [Outlines Model](models/outlines.md) dependencies `outlines` [PyPI ↗](https://pypi.org/project/outlines){:target="_blank"} and `vllm` [PyPI ↗](https://pypi.org/project/vllm){:target="_blank"} * `duckduckgo` - installs [DuckDuckGo Search Tool](common-tools.md#duckduckgo-search-tool) dependency `ddgs` [PyPI ↗](https://pypi.org/project/ddgs){:target="_blank"} * `tavily` - installs [Tavily Search Tool](common-tools.md#tavily-search-tool) dependency `tavily-python` [PyPI ↗](https://pypi.org/project/tavily-python){:target="_blank"} +* `exa` - installs [Exa Search Tool](common-tools.md#exa-search-tool) dependency `exa-py` [PyPI ↗](https://pypi.org/project/exa-py){:target="_blank"} * `cli` - installs [CLI](cli.md) dependencies `rich` [PyPI ↗](https://pypi.org/project/rich){:target="_blank"}, `prompt-toolkit` [PyPI ↗](https://pypi.org/project/prompt-toolkit){:target="_blank"}, and `argcomplete` [PyPI ↗](https://pypi.org/project/argcomplete){:target="_blank"} * `mcp` - installs [MCP](mcp/client.md) dependency `mcp` [PyPI ↗](https://pypi.org/project/mcp){:target="_blank"} * `fastmcp` - installs [FastMCP](mcp/fastmcp-client.md) dependency `fastmcp` [PyPI ↗](https://pypi.org/project/fastmcp){:target="_blank"} diff --git a/pydantic_ai_slim/pydantic_ai/common_tools/exa.py b/pydantic_ai_slim/pydantic_ai/common_tools/exa.py new file mode 100644 index 0000000000..4d6451504f --- /dev/null +++ b/pydantic_ai_slim/pydantic_ai/common_tools/exa.py @@ -0,0 +1,464 @@ +"""Exa tools for Pydantic AI agents. + +Provides web search, content retrieval, and AI-powered answer capabilities +using the Exa API, a neural search engine that finds high-quality, relevant +results across billions of web pages. +""" + +from dataclasses import dataclass +from typing import Literal, overload + +from typing_extensions import Any, TypedDict + +from pydantic_ai import FunctionToolset +from pydantic_ai.tools import Tool + +try: + from exa_py import AsyncExa +except ImportError as _import_error: + raise ImportError( + 'Please install `exa-py` to use the Exa tools, ' + 'you can use the `exa` optional group — `pip install "pydantic-ai-slim[exa]"`' + ) from _import_error + +__all__ = ( + 'ExaToolset', + 'exa_search_tool', + 'exa_find_similar_tool', + 'exa_get_contents_tool', + 'exa_answer_tool', +) + + +class ExaSearchResult(TypedDict): + """An Exa search result with content. + + See [Exa Search API documentation](https://docs.exa.ai/reference/search) + for more information. + """ + + title: str + """The title of the search result.""" + url: str + """The URL of the search result.""" + published_date: str | None + """The published date of the content, if available.""" + author: str | None + """The author of the content, if available.""" + text: str + """The text content of the search result.""" + + +class ExaAnswerResult(TypedDict): + """An Exa answer result with citations. + + See [Exa Answer API documentation](https://docs.exa.ai/reference/answer) + for more information. + """ + + answer: str + """The AI-generated answer to the query.""" + citations: list[dict[str, Any]] + """Citations supporting the answer.""" + + +class ExaContentResult(TypedDict): + """Content retrieved from a URL. + + See [Exa Contents API documentation](https://docs.exa.ai/reference/get-contents) + for more information. + """ + + url: str + """The URL of the content.""" + title: str + """The title of the page.""" + text: str + """The text content of the page.""" + author: str | None + """The author of the content, if available.""" + published_date: str | None + """The published date of the content, if available.""" + + +@dataclass +class ExaSearchTool: + """The Exa search tool.""" + + client: AsyncExa + """The Exa async client.""" + + num_results: int + """The number of results to return.""" + + max_characters: int | None + """Maximum characters of text content per result, or None for no limit.""" + + async def __call__( + self, + query: str, + search_type: Literal['auto', 'keyword', 'neural', 'fast', 'deep'] = 'auto', + ) -> list[ExaSearchResult]: + """Searches Exa for the given query and returns the results with content. + + Args: + query: The search query to execute with Exa. + search_type: The type of search to perform. 'auto' automatically chooses + the best search type, 'keyword' for exact matches, 'neural' for + semantic search, 'fast' for speed-optimized search, 'deep' for + comprehensive multi-query search. + + Returns: + The search results with text content. + """ + text_config: bool | dict[str, int] = {'maxCharacters': self.max_characters} if self.max_characters else True + response = await self.client.search( # pyright: ignore[reportUnknownMemberType] + query, + num_results=self.num_results, + type=search_type, + contents={'text': text_config}, + ) + + return [ + ExaSearchResult( + title=result.title or '', + url=result.url, + published_date=result.published_date, + author=result.author, + text=result.text or '', + ) + for result in response.results + ] + + +@dataclass +class ExaFindSimilarTool: + """The Exa find similar tool.""" + + client: AsyncExa + """The Exa async client.""" + + num_results: int + """The number of results to return.""" + + async def __call__( + self, + url: str, + exclude_source_domain: bool = True, + ) -> list[ExaSearchResult]: + """Finds pages similar to the given URL and returns them with content. + + Args: + url: The URL to find similar pages for. + exclude_source_domain: Whether to exclude results from the same domain + as the input URL. Defaults to True. + + Returns: + Similar pages with text content. + """ + response = await self.client.find_similar( # pyright: ignore[reportUnknownMemberType] + url, + num_results=self.num_results, + exclude_source_domain=exclude_source_domain, + contents={'text': True}, + ) + + return [ + ExaSearchResult( + title=result.title or '', + url=result.url, + published_date=result.published_date, + author=result.author, + text=result.text or '', + ) + for result in response.results + ] + + +@dataclass +class ExaGetContentsTool: + """The Exa get contents tool.""" + + client: AsyncExa + """The Exa async client.""" + + async def __call__( + self, + urls: list[str], + ) -> list[ExaContentResult]: + """Gets the content of the specified URLs. + + Args: + urls: A list of URLs to get content for. + + Returns: + The content of each URL. + """ + response = await self.client.get_contents(urls, text=True) # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType] + + return [ + ExaContentResult( + url=result.url, # pyright: ignore[reportUnknownMemberType,reportUnknownArgumentType] + title=result.title or '', # pyright: ignore[reportUnknownMemberType,reportUnknownArgumentType] + text=result.text or '', # pyright: ignore[reportUnknownMemberType,reportUnknownArgumentType] + author=result.author, # pyright: ignore[reportUnknownMemberType,reportUnknownArgumentType] + published_date=result.published_date, # pyright: ignore[reportUnknownMemberType,reportUnknownArgumentType] + ) + for result in response.results # pyright: ignore[reportUnknownVariableType,reportUnknownMemberType] + ] + + +@dataclass +class ExaAnswerTool: + """The Exa answer tool.""" + + client: AsyncExa + """The Exa async client.""" + + async def __call__( + self, + query: str, + ) -> ExaAnswerResult: + """Generates an AI-powered answer to the query with citations. + + Args: + query: The question to answer. + + Returns: + An answer with supporting citations from web sources. + """ + response = await self.client.answer(query, text=True) + + return ExaAnswerResult( + answer=response.answer, # pyright: ignore[reportUnknownMemberType,reportArgumentType,reportAttributeAccessIssue] + citations=[ + { + 'url': citation.url, # pyright: ignore[reportUnknownMemberType] + 'title': citation.title or '', # pyright: ignore[reportUnknownMemberType] + 'text': citation.text or '', # pyright: ignore[reportUnknownMemberType] + } + for citation in response.citations # pyright: ignore[reportUnknownVariableType,reportUnknownMemberType,reportAttributeAccessIssue] + ], + ) + + +@overload +def exa_search_tool( + api_key: str, + *, + num_results: int = 5, + max_characters: int | None = None, +) -> Tool[Any]: ... + + +@overload +def exa_search_tool( + *, + client: AsyncExa, + num_results: int = 5, + max_characters: int | None = None, +) -> Tool[Any]: ... + + +def exa_search_tool( + api_key: str | None = None, + *, + client: AsyncExa | None = None, + num_results: int = 5, + max_characters: int | None = None, +) -> Tool[Any]: + """Creates an Exa search tool. + + Args: + api_key: The Exa API key. Required if `client` is not provided. + + You can get one by signing up at [https://dashboard.exa.ai](https://dashboard.exa.ai). + client: An existing AsyncExa client. If provided, `api_key` is ignored. + This is useful for sharing a client across multiple tools. + num_results: The number of results to return. Defaults to 5. + max_characters: Maximum characters of text content per result. Use this to limit + token usage. Defaults to None (no limit). + """ + if client is None: + if api_key is None: + raise ValueError('Either api_key or client must be provided') + client = AsyncExa(api_key=api_key) + return Tool[Any]( + ExaSearchTool( + client=client, + num_results=num_results, + max_characters=max_characters, + ).__call__, + name='exa_search', + description='Searches Exa for the given query and returns the results with content. Exa is a neural search engine that finds high-quality, relevant results.', + ) + + +@overload +def exa_find_similar_tool( + api_key: str, + *, + num_results: int = 5, +) -> Tool[Any]: ... + + +@overload +def exa_find_similar_tool( + *, + client: AsyncExa, + num_results: int = 5, +) -> Tool[Any]: ... + + +def exa_find_similar_tool( + api_key: str | None = None, + *, + client: AsyncExa | None = None, + num_results: int = 5, +) -> Tool[Any]: + """Creates an Exa find similar tool. + + Args: + api_key: The Exa API key. Required if `client` is not provided. + + You can get one by signing up at [https://dashboard.exa.ai](https://dashboard.exa.ai). + client: An existing AsyncExa client. If provided, `api_key` is ignored. + This is useful for sharing a client across multiple tools. + num_results: The number of similar results to return. Defaults to 5. + """ + if client is None: + if api_key is None: + raise ValueError('Either api_key or client must be provided') + client = AsyncExa(api_key=api_key) + return Tool[Any]( + ExaFindSimilarTool(client=client, num_results=num_results).__call__, + name='exa_find_similar', + description='Finds web pages similar to a given URL. Useful for discovering related content, competitors, or alternative sources.', + ) + + +@overload +def exa_get_contents_tool(api_key: str) -> Tool[Any]: ... + + +@overload +def exa_get_contents_tool(*, client: AsyncExa) -> Tool[Any]: ... + + +def exa_get_contents_tool( + api_key: str | None = None, + *, + client: AsyncExa | None = None, +) -> Tool[Any]: + """Creates an Exa get contents tool. + + Args: + api_key: The Exa API key. Required if `client` is not provided. + + You can get one by signing up at [https://dashboard.exa.ai](https://dashboard.exa.ai). + client: An existing AsyncExa client. If provided, `api_key` is ignored. + This is useful for sharing a client across multiple tools. + """ + if client is None: + if api_key is None: + raise ValueError('Either api_key or client must be provided') + client = AsyncExa(api_key=api_key) + return Tool[Any]( + ExaGetContentsTool(client=client).__call__, + name='exa_get_contents', + description='Gets the full text content of specified URLs. Useful for reading articles, documentation, or any web page when you have the exact URL.', + ) + + +@overload +def exa_answer_tool(api_key: str) -> Tool[Any]: ... + + +@overload +def exa_answer_tool(*, client: AsyncExa) -> Tool[Any]: ... + + +def exa_answer_tool( + api_key: str | None = None, + *, + client: AsyncExa | None = None, +) -> Tool[Any]: + """Creates an Exa answer tool. + + Args: + api_key: The Exa API key. Required if `client` is not provided. + + You can get one by signing up at [https://dashboard.exa.ai](https://dashboard.exa.ai). + client: An existing AsyncExa client. If provided, `api_key` is ignored. + This is useful for sharing a client across multiple tools. + """ + if client is None: + if api_key is None: + raise ValueError('Either api_key or client must be provided') + client = AsyncExa(api_key=api_key) + return Tool[Any]( + ExaAnswerTool(client=client).__call__, + name='exa_answer', + description='Generates an AI-powered answer to a question with citations from web sources. Returns a comprehensive answer backed by real sources.', + ) + + +class ExaToolset(FunctionToolset): + """A toolset that provides Exa search tools with a shared client. + + This is more efficient than creating individual tools when using multiple + Exa tools, as it shares a single API client across all tools. + + Example: + ```python + from pydantic_ai import Agent + from pydantic_ai.common_tools.exa import ExaToolset + + toolset = ExaToolset(api_key='your-api-key') + agent = Agent('openai:gpt-4o', toolsets=[toolset]) + ``` + """ + + def __init__( + self, + api_key: str, + *, + num_results: int = 5, + max_characters: int | None = None, + include_search: bool = True, + include_find_similar: bool = True, + include_get_contents: bool = True, + include_answer: bool = True, + id: str | None = None, + ): + """Creates an Exa toolset with a shared client. + + Args: + api_key: The Exa API key. + + You can get one by signing up at [https://dashboard.exa.ai](https://dashboard.exa.ai). + num_results: The number of results to return for search and find_similar. Defaults to 5. + max_characters: Maximum characters of text content per result. Use this to limit + token usage. Defaults to None (no limit). + include_search: Whether to include the search tool. Defaults to True. + include_find_similar: Whether to include the find_similar tool. Defaults to True. + include_get_contents: Whether to include the get_contents tool. Defaults to True. + include_answer: Whether to include the answer tool. Defaults to True. + id: Optional ID for the toolset, used for durable execution environments. + """ + client = AsyncExa(api_key=api_key) + tools: list[Tool[Any]] = [] + + if include_search: + tools.append(exa_search_tool(client=client, num_results=num_results, max_characters=max_characters)) + + if include_find_similar: + tools.append(exa_find_similar_tool(client=client, num_results=num_results)) + + if include_get_contents: + tools.append(exa_get_contents_tool(client=client)) + + if include_answer: + tools.append(exa_answer_tool(client=client)) + + super().__init__(tools, id=id) diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml index 8d196a813b..beb1fddbe6 100644 --- a/pydantic_ai_slim/pyproject.toml +++ b/pydantic_ai_slim/pyproject.toml @@ -85,6 +85,7 @@ outlines-vllm-offline = ["vllm; python_version < '3.12' and (sys_platform != 'da # Tools duckduckgo = ["ddgs>=9.0.0"] tavily = ["tavily-python>=0.5.0"] +exa = ["exa-py>=2.0.0"] # CLI cli = [ "rich>=13", diff --git a/pyproject.toml b/pyproject.toml index 48a77218ce..a09d40932c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,6 +97,7 @@ dev = [ "coverage[toml]>=7.10.7", "dirty-equals>=0.9.0", "duckduckgo-search>=7.0.0", + "exa-py>=2.0.0", "inline-snapshot>=0.19.3", "pytest>=8.3.3", "pytest-examples>=0.0.18", @@ -263,7 +264,8 @@ include = [ omit = [ "tests/test_live.py", "tests/example_modules/*.py", - "pydantic_ai_slim/pydantic_ai/ext/aci.py", # aci-sdk is too niche to be added as an (optional) dependency + "pydantic_ai_slim/pydantic_ai/ext/aci.py", # aci-sdk is too niche to be added as an (optional) dependency + "pydantic_ai_slim/pydantic_ai/common_tools/exa.py", # exa-py integration with external API calls ] branch = true # Disable include-ignored warnings as --source is enabled automatically causing a self conflict as per: diff --git a/uv.lock b/uv.lock index ba4012112b..0f0b05b834 100644 --- a/uv.lock +++ b/uv.lock @@ -1624,6 +1624,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830, upload-time = "2024-12-21T20:09:44.175Z" }, ] +[[package]] +name = "exa-py" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "openai" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/7f/38cda469587b2c69645d3ccc2dda47a5c1213cc98e9090271b462704496e/exa_py-2.0.1.tar.gz", hash = "sha256:aaae32b6356ed855b4decee3fcb8e71272439c4614a20419612c1674c8c1f648", size = 43553, upload-time = "2025-11-21T15:54:51.306Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/da/70ff6161914988ec756e6b2a57e23d28247cc825bfdbdcddf0bb967e87d0/exa_py-2.0.1-py3-none-any.whl", hash = "sha256:e2773f16dc2d70baad284139d58a292ac911307c6898ecd1dbb413035db0c950", size = 56043, upload-time = "2025-11-21T15:54:50.2Z" }, +] + [[package]] name = "exceptiongroup" version = "1.2.2" @@ -5414,6 +5428,7 @@ dev = [ { name = "diff-cover" }, { name = "dirty-equals" }, { name = "duckduckgo-search" }, + { name = "exa-py" }, { name = "genai-prices" }, { name = "inline-snapshot" }, { name = "mcp-run-python" }, @@ -5473,6 +5488,7 @@ dev = [ { name = "diff-cover", specifier = ">=9.2.0" }, { name = "dirty-equals", specifier = ">=0.9.0" }, { name = "duckduckgo-search", specifier = ">=7.0.0" }, + { name = "exa-py", specifier = ">=2.0.0" }, { name = "genai-prices", specifier = ">=0.0.28" }, { name = "inline-snapshot", specifier = ">=0.19.3" }, { name = "mcp-run-python", specifier = ">=0.0.20" }, @@ -5593,6 +5609,9 @@ duckduckgo = [ evals = [ { name = "pydantic-evals" }, ] +exa = [ + { name = "exa-py" }, +] fastmcp = [ { name = "fastmcp" }, ] @@ -5679,6 +5698,7 @@ requires-dist = [ { name = "cohere", marker = "sys_platform != 'emscripten' and extra == 'cohere'", specifier = ">=5.18.0" }, { name = "dbos", marker = "extra == 'dbos'", specifier = ">=1.14.0" }, { name = "ddgs", marker = "extra == 'duckduckgo'", specifier = ">=9.0.0" }, + { name = "exa-py", marker = "extra == 'exa'", specifier = ">=2.0.0" }, { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, { name = "fasta2a", marker = "extra == 'a2a'", specifier = ">=0.4.1" }, { name = "fastmcp", marker = "extra == 'fastmcp'", specifier = ">=2.12.0" }, @@ -5724,7 +5744,7 @@ requires-dist = [ { name = "uvicorn", marker = "extra == 'web'", specifier = ">=0.38.0" }, { name = "vllm", marker = "(python_full_version < '3.12' and platform_machine != 'x86_64' and extra == 'outlines-vllm-offline') or (python_full_version < '3.12' and sys_platform != 'darwin' and extra == 'outlines-vllm-offline')" }, ] -provides-extras = ["a2a", "ag-ui", "anthropic", "bedrock", "cli", "cohere", "dbos", "duckduckgo", "evals", "fastmcp", "google", "groq", "huggingface", "logfire", "mcp", "mistral", "openai", "openrouter", "outlines-llamacpp", "outlines-mlxlm", "outlines-sglang", "outlines-transformers", "outlines-vllm-offline", "prefect", "retries", "tavily", "temporal", "ui", "vertexai", "web"] +provides-extras = ["a2a", "ag-ui", "anthropic", "bedrock", "cli", "cohere", "dbos", "duckduckgo", "evals", "exa", "fastmcp", "google", "groq", "huggingface", "logfire", "mcp", "mistral", "openai", "openrouter", "outlines-llamacpp", "outlines-mlxlm", "outlines-sglang", "outlines-transformers", "outlines-vllm-offline", "prefect", "retries", "tavily", "temporal", "ui", "vertexai", "web"] [[package]] name = "pydantic-core"