diff --git a/pydantic_ai_slim/pydantic_ai/_output.py b/pydantic_ai_slim/pydantic_ai/_output.py index 123678d7b1..b48c25595e 100644 --- a/pydantic_ai_slim/pydantic_ai/_output.py +++ b/pydantic_ai_slim/pydantic_ai/_output.py @@ -891,11 +891,13 @@ def build( name = None description = None strict = None + examples = None if isinstance(output, ToolOutput): # do we need to error on conflicts here? (DavidM): If this is internal maybe doesn't matter, if public, use overloads name = output.name description = output.description strict = output.strict + examples = output.examples output = output.output @@ -931,6 +933,7 @@ def build( parameters_json_schema=object_def.json_schema, strict=object_def.strict, outer_typed_dict_key=processor.outer_typed_dict_key, + examples=examples, kind='output', ) processors[name] = processor diff --git a/pydantic_ai_slim/pydantic_ai/agent/__init__.py b/pydantic_ai_slim/pydantic_ai/agent/__init__.py index b8d67d15d6..ad316ccf06 100644 --- a/pydantic_ai_slim/pydantic_ai/agent/__init__.py +++ b/pydantic_ai_slim/pydantic_ai/agent/__init__.py @@ -1050,6 +1050,7 @@ def tool( strict: bool | None = None, sequential: bool = False, requires_approval: bool = False, + examples: list[dict[str, Any]] | None = None, metadata: dict[str, Any] | None = None, timeout: float | None = None, ) -> Callable[[ToolFuncContext[AgentDepsT, ToolParams]], ToolFuncContext[AgentDepsT, ToolParams]]: ... @@ -1069,6 +1070,7 @@ def tool( strict: bool | None = None, sequential: bool = False, requires_approval: bool = False, + examples: list[dict[str, Any]] | None = None, metadata: dict[str, Any] | None = None, timeout: float | None = None, ) -> Any: @@ -1119,6 +1121,8 @@ async def spam(ctx: RunContext[str], y: float) -> float: sequential: Whether the function requires a sequential/serial execution environment. Defaults to False. requires_approval: Whether this tool requires human-in-the-loop approval. Defaults to False. See the [tools documentation](../deferred-tools.md#human-in-the-loop-tool-approval) for more info. + examples: Example inputs demonstrating correct tool usage. Defaults to None. + See [`ToolDefinition.examples`][pydantic_ai.tools.ToolDefinition.examples] for more info. metadata: Optional metadata for the tool. This is not sent to the model but can be used for filtering and tool behavior customization. timeout: Timeout in seconds for tool execution. If the tool takes longer, a retry prompt is returned to the model. Overrides the agent-level `tool_timeout` if set. Defaults to None (no timeout). @@ -1141,6 +1145,7 @@ def tool_decorator( strict=strict, sequential=sequential, requires_approval=requires_approval, + examples=examples, metadata=metadata, timeout=timeout, ) @@ -1166,6 +1171,7 @@ def tool_plain( strict: bool | None = None, sequential: bool = False, requires_approval: bool = False, + examples: list[dict[str, Any]] | None = None, metadata: dict[str, Any] | None = None, timeout: float | None = None, ) -> Callable[[ToolFuncPlain[ToolParams]], ToolFuncPlain[ToolParams]]: ... @@ -1185,6 +1191,7 @@ def tool_plain( strict: bool | None = None, sequential: bool = False, requires_approval: bool = False, + examples: list[dict[str, Any]] | None = None, metadata: dict[str, Any] | None = None, timeout: float | None = None, ) -> Any: @@ -1235,6 +1242,8 @@ async def spam(ctx: RunContext[str]) -> float: sequential: Whether the function requires a sequential/serial execution environment. Defaults to False. requires_approval: Whether this tool requires human-in-the-loop approval. Defaults to False. See the [tools documentation](../deferred-tools.md#human-in-the-loop-tool-approval) for more info. + examples: Example inputs demonstrating correct tool usage. Defaults to None. + See [`ToolDefinition.examples`][pydantic_ai.tools.ToolDefinition.examples] for more info. metadata: Optional metadata for the tool. This is not sent to the model but can be used for filtering and tool behavior customization. timeout: Timeout in seconds for tool execution. If the tool takes longer, a retry prompt is returned to the model. Overrides the agent-level `tool_timeout` if set. Defaults to None (no timeout). @@ -1255,6 +1264,7 @@ def tool_decorator(func_: ToolFuncPlain[ToolParams]) -> ToolFuncPlain[ToolParams strict=strict, sequential=sequential, requires_approval=requires_approval, + examples=examples, metadata=metadata, timeout=timeout, ) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index fea4461f66..9bd480b705 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -67,6 +67,7 @@ APIConnectionError, APIStatusError, AsyncAnthropicBedrock, + AsyncAnthropicVertex, AsyncStream, omit as OMIT, ) @@ -459,6 +460,14 @@ def _get_betas_and_extra_headers( if has_strict_tools or model_request_parameters.output_mode == 'native': betas.add('structured-outputs-2025-11-13') + # Check if any tools use input_examples (tool use examples feature) + has_input_examples = any('input_examples' in tool for tool in tools) + if has_input_examples: + if isinstance(self.client, (AsyncAnthropicBedrock, AsyncAnthropicVertex)): + betas.add('tool-examples-2025-10-29') + else: + betas.add('advanced-tool-use-2025-11-20') + if beta_header := extra_headers.pop('anthropic-beta', None): betas.update({stripped_beta for beta in beta_header.split(',') if (stripped_beta := beta.strip())}) @@ -1125,6 +1134,8 @@ def _map_tool_definition(self, f: ToolDefinition) -> BetaToolParam: } if f.strict and self.profile.supports_json_schema_output: tool_param['strict'] = f.strict + if f.examples: + tool_param['input_examples'] = f.examples return tool_param @staticmethod diff --git a/pydantic_ai_slim/pydantic_ai/output.py b/pydantic_ai_slim/pydantic_ai/output.py index ae8d0fd39b..79a9da7641 100644 --- a/pydantic_ai_slim/pydantic_ai/output.py +++ b/pydantic_ai_slim/pydantic_ai/output.py @@ -115,6 +115,8 @@ class Vehicle(BaseModel): """The maximum number of retries for the tool.""" strict: bool | None """Whether to use strict mode for the tool.""" + examples: list[dict[str, Any]] | None + """Example inputs demonstrating correct tool usage.""" def __init__( self, @@ -124,12 +126,14 @@ def __init__( description: str | None = None, max_retries: int | None = None, strict: bool | None = None, + examples: list[dict[str, Any]] | None = None, ): self.output = type_ self.name = name self.description = description self.max_retries = max_retries self.strict = strict + self.examples = examples @dataclass(init=False) diff --git a/pydantic_ai_slim/pydantic_ai/tools.py b/pydantic_ai_slim/pydantic_ai/tools.py index 900278ce44..0d51470554 100644 --- a/pydantic_ai_slim/pydantic_ai/tools.py +++ b/pydantic_ai_slim/pydantic_ai/tools.py @@ -272,6 +272,7 @@ class Tool(Generic[ToolAgentDepsT]): strict: bool | None sequential: bool requires_approval: bool + examples: list[dict[str, Any]] | None metadata: dict[str, Any] | None timeout: float | None function_schema: _function_schema.FunctionSchema @@ -296,6 +297,7 @@ def __init__( strict: bool | None = None, sequential: bool = False, requires_approval: bool = False, + examples: list[dict[str, Any]] | None = None, metadata: dict[str, Any] | None = None, timeout: float | None = None, function_schema: _function_schema.FunctionSchema | None = None, @@ -353,6 +355,8 @@ async def prep_my_tool( sequential: Whether the function requires a sequential/serial execution environment. Defaults to False. requires_approval: Whether this tool requires human-in-the-loop approval. Defaults to False. See the [tools documentation](../deferred-tools.md#human-in-the-loop-tool-approval) for more info. + examples: Example inputs demonstrating correct tool usage. Defaults to None. + See [`ToolDefinition.examples`][pydantic_ai.tools.ToolDefinition.examples] for more info. metadata: Optional metadata for the tool. This is not sent to the model but can be used for filtering and tool behavior customization. timeout: Timeout in seconds for tool execution. If the tool takes longer, a retry prompt is returned to the model. Defaults to None (no timeout). @@ -376,6 +380,7 @@ async def prep_my_tool( self.strict = strict self.sequential = sequential self.requires_approval = requires_approval + self.examples = examples self.metadata = metadata self.timeout = timeout @@ -388,6 +393,7 @@ def from_schema( json_schema: JsonSchemaValue, takes_ctx: bool = False, sequential: bool = False, + examples: list[dict[str, Any]] | None = None, ) -> Self: """Creates a Pydantic tool from a function and a JSON schema. @@ -402,6 +408,8 @@ def from_schema( takes_ctx: An optional boolean parameter indicating whether the function accepts the context object as an argument. sequential: Whether the function requires a sequential/serial execution environment. Defaults to False. + examples: Example inputs demonstrating correct tool usage. Defaults to None. + See [`ToolDefinition.examples`][pydantic_ai.tools.ToolDefinition.examples] for more info. Returns: A Pydantic tool that calls the function @@ -422,6 +430,7 @@ def from_schema( description=description, function_schema=function_schema, sequential=sequential, + examples=examples, ) @property @@ -434,6 +443,7 @@ def tool_def(self): sequential=self.sequential, metadata=self.metadata, timeout=self.timeout, + examples=self.examples, kind='unapproved' if self.requires_approval else 'function', ) @@ -514,6 +524,18 @@ class ToolDefinition: See the [tools documentation](../deferred-tools.md#human-in-the-loop-tool-approval) for more info. """ + examples: list[dict[str, Any]] | None = None + """Example inputs demonstrating correct tool usage patterns. + + Provide 1-5 realistic examples showing parameter conventions, optional field patterns, + nested structures, and API-specific conventions. Each example must validate against + the tool's `parameters_json_schema`. + + Supported by: + + * [Anthropic](https://platform.claude.com/docs/en/agents-and-tools/tool-use/implement-tool-use#providing-tool-use-examples) + """ + metadata: dict[str, Any] | None = None """Tool metadata that can be set by the toolset this tool came from. It is not sent to the model, but can be used for filtering and tool behavior customization. diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/function.py b/pydantic_ai_slim/pydantic_ai/toolsets/function.py index 9655643f4b..a5b76b36cd 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/function.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/function.py @@ -131,6 +131,7 @@ def tool( strict: bool | None = None, sequential: bool | None = None, requires_approval: bool | None = None, + examples: list[dict[str, Any]] | None = None, metadata: dict[str, Any] | None = None, timeout: float | None = None, ) -> Callable[[ToolFuncEither[AgentDepsT, ToolParams]], ToolFuncEither[AgentDepsT, ToolParams]]: ... @@ -150,6 +151,7 @@ def tool( strict: bool | None = None, sequential: bool | None = None, requires_approval: bool | None = None, + examples: list[dict[str, Any]] | None = None, metadata: dict[str, Any] | None = None, timeout: float | None = None, ) -> Any: @@ -206,6 +208,9 @@ async def spam(ctx: RunContext[str], y: float) -> float: requires_approval: Whether this tool requires human-in-the-loop approval. Defaults to False. See the [tools documentation](../deferred-tools.md#human-in-the-loop-tool-approval) for more info. If `None`, the default value is determined by the toolset. + examples: Example inputs demonstrating correct tool usage. Defaults to None. + See [`ToolDefinition.examples`][pydantic_ai.tools.ToolDefinition.examples] for more info. + If `None`, the default value is determined by the toolset. metadata: Optional metadata for the tool. This is not sent to the model but can be used for filtering and tool behavior customization. If `None`, the default value is determined by the toolset. If provided, it will be merged with the toolset's metadata. timeout: Timeout in seconds for tool execution. If the tool takes longer, a retry prompt is returned to the model. @@ -229,6 +234,7 @@ def tool_decorator( strict=strict, sequential=sequential, requires_approval=requires_approval, + examples=examples, metadata=metadata, timeout=timeout, ) @@ -250,6 +256,7 @@ def add_function( strict: bool | None = None, sequential: bool | None = None, requires_approval: bool | None = None, + examples: list[dict[str, Any]] | None = None, metadata: dict[str, Any] | None = None, timeout: float | None = None, ) -> None: @@ -284,6 +291,9 @@ def add_function( requires_approval: Whether this tool requires human-in-the-loop approval. Defaults to False. See the [tools documentation](../deferred-tools.md#human-in-the-loop-tool-approval) for more info. If `None`, the default value is determined by the toolset. + examples: Example inputs demonstrating correct tool usage. Defaults to None. + See [`ToolDefinition.examples`][pydantic_ai.tools.ToolDefinition.examples] for more info. + If `None`, the default value is determined by the toolset. metadata: Optional metadata for the tool. This is not sent to the model but can be used for filtering and tool behavior customization. If `None`, the default value is determined by the toolset. If provided, it will be merged with the toolset's metadata. timeout: Timeout in seconds for tool execution. If the tool takes longer, a retry prompt is returned to the model. @@ -315,6 +325,7 @@ def add_function( strict=strict, sequential=sequential, requires_approval=requires_approval, + examples=examples, metadata=metadata, timeout=timeout, ) diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index c7be2c3ce2..ffad33b1cf 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -8265,3 +8265,83 @@ async def test_anthropic_system_prompts_and_instructions_ordering(): # Verify user message is in anthropic_messages assert len(anthropic_messages) == 1 assert anthropic_messages[0]['role'] == 'user' + + +async def test_anthropic_tool_definition_examples(allow_model_requests: None): + from pydantic_ai.tools import ToolDefinition + + examples = [{'x': 1}] + tool_def = ToolDefinition(name='foo', description='bar', examples=examples) + + # a dummy client + mock_client = MockAnthropic.create_mock(completion_message([], BetaUsage(input_tokens=0, output_tokens=0))) + m = AnthropicModel('claude-3-5-sonnet-latest', provider=AnthropicProvider(anthropic_client=mock_client)) + + mapped = m._map_tool_definition(tool_def) # pyright: ignore[reportPrivateUsage] + assert mapped.get('input_examples') == examples + + +async def test_anthropic_beta_headers_with_examples(allow_model_requests: None): + # Test standard client + c = completion_message([BetaTextBlock(text='hi', type='text')], BetaUsage(input_tokens=1, output_tokens=1)) + mock_client = MockAnthropic.create_mock(c) + m = AnthropicModel('claude-3-5-sonnet-latest', provider=AnthropicProvider(anthropic_client=mock_client)) + agent = Agent(m) + + examples = [{'x': 1}] + + @agent.tool_plain(examples=examples) + def my_tool(x: int) -> int: + return x + + await agent.run('hi') + + completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0] + betas = completion_kwargs['betas'] + assert 'advanced-tool-use-2025-11-20' in betas + assert 'tool-examples-2025-10-29' not in betas + + +async def test_anthropic_bedrock_beta_headers_with_examples(allow_model_requests: None): + from unittest.mock import MagicMock + + from anthropic import AsyncAnthropicBedrock + + # Mock client to pass isinstance check + mock_client = MagicMock(spec=AsyncAnthropicBedrock) + mock_client.base_url = 'https://bedrock.amazonaws.com' + + m = AnthropicModel('claude-3-5-sonnet-latest', provider=AnthropicProvider(anthropic_client=mock_client)) + + from pydantic_ai.models import ModelRequestParameters + from pydantic_ai.models.anthropic import AnthropicModelSettings + + tools = [{'name': 'foo', 'input_examples': [{'x': 1}]}] + params = ModelRequestParameters() + settings = AnthropicModelSettings() + + betas, _ = m._get_betas_and_extra_headers(tools, params, settings) # pyright: ignore[reportPrivateUsage,reportArgumentType] + + assert 'tool-examples-2025-10-29' in betas + assert 'advanced-tool-use-2025-11-20' not in betas + + +async def test_anthropic_vertex_beta_headers_with_examples(allow_model_requests: None): + from unittest.mock import MagicMock + + from anthropic import AsyncAnthropicVertex + + mock_client = MagicMock(spec=AsyncAnthropicVertex) + m = AnthropicModel('claude-3-5-sonnet-latest', provider=AnthropicProvider(anthropic_client=mock_client)) + + from pydantic_ai.models import ModelRequestParameters + from pydantic_ai.models.anthropic import AnthropicModelSettings + + tools = [{'name': 'foo', 'input_examples': [{'x': 1}]}] + params = ModelRequestParameters() + settings = AnthropicModelSettings() + + betas, _ = m._get_betas_and_extra_headers(tools, params, settings) # pyright: ignore[reportPrivateUsage,reportArgumentType] + + assert 'tool-examples-2025-10-29' in betas + assert 'advanced-tool-use-2025-11-20' not in betas diff --git a/tests/test_tools.py b/tests/test_tools.py index 0031f702cd..c74d54bd06 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -149,6 +149,7 @@ def test_docstring_google(docstring_format: Literal['google', 'auto']): 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -184,6 +185,7 @@ def test_docstring_sphinx(docstring_format: Literal['sphinx', 'auto']): 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -227,6 +229,7 @@ def test_docstring_numpy(docstring_format: Literal['numpy', 'auto']): 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -270,6 +273,7 @@ def my_tool(x: int) -> str: # pragma: no cover 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -311,6 +315,7 @@ def my_tool(x: int) -> str: # pragma: no cover 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -358,6 +363,7 @@ def my_tool(x: int) -> str: # pragma: no cover 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -393,6 +399,7 @@ def test_only_returns_type(): 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -419,6 +426,7 @@ def test_docstring_unknown(): 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -463,6 +471,7 @@ def test_docstring_google_no_body(docstring_format: Literal['google', 'auto']): 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -500,6 +509,7 @@ def takes_just_model(model: Foo) -> str: 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -546,6 +556,7 @@ def takes_just_model(model: Foo, z: int) -> str: 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -912,6 +923,7 @@ def test_suppress_griffe_logging(caplog: LogCaptureFixture): 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -985,6 +997,7 @@ def my_tool_plain(*, a: int = 1, b: int) -> int: 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, }, @@ -1001,6 +1014,7 @@ def my_tool_plain(*, a: int = 1, b: int) -> int: 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, }, @@ -1090,6 +1104,7 @@ def my_tool(x: Annotated[str | None, WithJsonSchema({'type': 'string'})] = None, 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, }, @@ -1104,6 +1119,7 @@ def my_tool(x: Annotated[str | None, WithJsonSchema({'type': 'string'})] = None, 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, }, @@ -1142,6 +1158,7 @@ def get_score(data: Data) -> int: ... # pragma: no branch 'strict': None, 'kind': 'function', 'sequential': False, + 'examples': None, 'metadata': None, 'timeout': None, } @@ -2757,3 +2774,59 @@ def test_agent_tool_timeout_passed_to_toolset(): # The agent's tool_timeout should be passed to the toolset as timeout assert agent._function_toolset.timeout == 30.0 + + +def test_tool_examples_init(): + def my_tool(x: int) -> int: + return x + + examples = [{'x': 1}] + tool = Tool(my_tool, examples=examples) + assert tool.examples == examples + assert tool.tool_def.examples == examples + + +def test_tool_from_schema_examples(): + def my_tool(x: int) -> int: + return x + + examples = [{'x': 1}] + tool = Tool.from_schema( + my_tool, + name='my_tool', + description='desc', + json_schema={'type': 'object', 'properties': {'x': {'type': 'integer'}}}, + examples=examples, + ) + assert tool.examples == examples + assert tool.tool_def.examples == examples + + +def test_agent_tool_decorators_examples(): + agent = Agent('test') + examples = [{'x': 1}] + + @agent.tool(examples=examples) + def tool_ctx(ctx: RunContext[None], x: int) -> int: + return x + + @agent.tool_plain(examples=examples) + def tool_plain(x: int) -> int: + return x + + assert agent._function_toolset.tools['tool_ctx'].examples == examples + assert agent._function_toolset.tools['tool_plain'].examples == examples + + +def test_tool_output_examples(): + from pydantic_ai.output import ToolOutput + + examples = [{'x': 1}] + tool_output = ToolOutput(int, name='foo', examples=examples) + assert tool_output.examples == examples + + agent = Agent('test', output_type=tool_output) + # Access the output toolset to verify definition + assert agent._output_toolset is not None + tool_def = agent._output_toolset._tool_defs[0] + assert tool_def.examples == examples