Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,6 @@ jobs:

- run: uv sync --only-dev
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also move the ${{ matrix.install.command }} here and make the test command uv run --no-sync ...?


- run: uv run mcp-run-python example --deps=numpy
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DouweM is this still needed now that we have the mcp-run-python in another repo? I've already checked that this test is also ran on that repo.

Copy link
Collaborator

@DouweM DouweM Dec 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Kludex I'm fine removing it.

Note that we also test that it mcp-run-python be started here:

from pydantic_ai import Agent
from pydantic_ai.mcp import MCPServerStdio
server = MCPServerStdio( # (1)!
'uv', args=['run', 'mcp-run-python', 'stdio'], timeout=10
)
agent = Agent('openai:gpt-5', toolsets=[server])
async def main():
result = await agent.run('How many days between 2000-01-01 and 2025-03-18?')
print(result.output)
#> There are 9,208 days between January 1, 2000, and March 18, 2025.

That makes the test suite require deno. I actually think we should drop that entirely and really treat it as a separate package.


- name: cache HuggingFace models
uses: actions/cache@v4
with:
Expand Down Expand Up @@ -205,6 +203,7 @@ jobs:
CI: true
COVERAGE_PROCESS_START: ./pyproject.toml
RUN_LLAMA_CPP_TESTS: false
UV_FROZEN: "0"
steps:
- uses: actions/checkout@v4

Expand All @@ -220,9 +219,7 @@ jobs:

- run: mkdir .coverage

- run: uv sync --group dev

- run: uv run mcp-run-python example --deps=numpy
- run: uv sync --group dev --resolution lowest-direct --all-extras

- name: cache HuggingFace models
uses: actions/cache@v4
Expand All @@ -232,9 +229,7 @@ jobs:
restore-keys: |
hf-${{ runner.os }}-

- run: unset UV_FROZEN

- run: uv run --all-extras --resolution lowest-direct coverage run -m pytest --durations=100 -n auto --dist=loadgroup
- run: uv run --no-sync coverage run -m pytest --durations=100 -n auto --dist=loadgroup
env:
COVERAGE_FILE: .coverage/.coverage.${{matrix.python-version}}-lowest-versions

Expand Down
7 changes: 1 addition & 6 deletions pydantic_ai_slim/pydantic_ai/ui/_web/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,7 @@ async def index(request: Request) -> Response:
status_code=502,
)

return HTMLResponse(
content=content,
headers={
'Cache-Control': 'public, max-age=3600',
},
)
return HTMLResponse(content=content, headers={'Cache-Control': 'public, max-age=3600'})

app.router.add_route('/', index, methods=['GET'])
app.router.add_route('/{id}', index, methods=['GET'])
Expand Down
6 changes: 3 additions & 3 deletions pydantic_ai_slim/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,11 @@ openrouter = ["openai>=2.8.0"]
mistral = ["mistralai>=1.9.10"]
bedrock = ["boto3>=1.40.14"]
huggingface = ["huggingface-hub[inference]>=0.33.5,<1.0.0"]
outlines-transformers = ["outlines[transformers]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "transformers>=4.0.0", "pillow", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
outlines-transformers = ["outlines[transformers]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "transformers>=4.0.0", "pillow>11.0.0", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
outlines-llamacpp = ["outlines[llamacpp]>=1.0.0, <1.3.0"]
outlines-mlxlm = ["outlines[mlxlm]>=1.0.0, <1.3.0; platform_system == 'Darwin' and platform_machine == 'arm64'"]
outlines-sglang = ["outlines[sglang]>=1.0.0, <1.3.0", "pillow"]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Inconsistent version specifier excludes Pillow 11.0.0

The pillow>11.0.0 constraint uses an exclusive lower bound (>) while all other version constraints in the file consistently use inclusive bounds (>=). In the same PR, vllm>=0.8.0 uses >=. Since Pillow has no version between 11.0.0 and 11.1.0, this effectively sets the minimum to 11.1.0, potentially excluding a valid version. If the intent was to require Pillow 11.0.0 or later, this appears to be a typo and >=11.0.0 would be the correct specifier.

Fix in Cursor Fix in Web

outlines-vllm-offline = ["vllm; python_version < '3.12' and (sys_platform != 'darwin' or platform_machine != 'x86_64')", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "outlines>=1.0.0, <1.3.0"]
outlines-sglang = ["outlines[sglang]>=1.0.0, <1.3.0", "pillow>11.0.0"]
outlines-vllm-offline = ["vllm>=0.8.0; python_version < '3.12' and (sys_platform != 'darwin' or platform_machine != 'x86_64')", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "outlines>=1.0.0, <1.3.0"]
# Tools
duckduckgo = ["ddgs>=9.0.0"]
tavily = ["tavily-python>=0.5.0"]
Expand Down
9 changes: 4 additions & 5 deletions tests/models/anthropic/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from collections.abc import Callable
from functools import cache
from typing import TYPE_CHECKING

import pytest

Expand All @@ -18,7 +19,8 @@
from pydantic_ai.models.anthropic import AnthropicModel
from pydantic_ai.providers.anthropic import AnthropicProvider

AnthropicModelFactory = Callable[..., AnthropicModel]
if TYPE_CHECKING:
AnthropicModelFactory = Callable[..., AnthropicModel]


# Model factory fixture for live API tests
Expand All @@ -27,10 +29,7 @@ def anthropic_model(anthropic_api_key: str) -> AnthropicModelFactory:
"""Factory to create Anthropic models with custom configuration."""

@cache
def _create_model(
model_name: str,
api_key: str | None = None,
) -> AnthropicModel:
def _create_model(model_name: str, api_key: str | None = None) -> AnthropicModel:
"""Create an AnthropicModel with the specified configuration.

Args:
Expand Down
8 changes: 4 additions & 4 deletions tests/models/anthropic/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from __future__ import annotations as _annotations

from collections.abc import Callable
from typing import Annotated
from typing import TYPE_CHECKING, Annotated

import httpx
import pytest
Expand All @@ -35,6 +35,9 @@

from ..test_anthropic import completion_message

if TYPE_CHECKING:
ANTHROPIC_MODEL_FIXTURE = Callable[..., AnthropicModel]

pytestmark = [
pytest.mark.skipif(not imports_successful(), reason='anthropic not installed'),
pytest.mark.anyio,
Expand Down Expand Up @@ -231,9 +234,6 @@ async def verify_headers(request: httpx.Request):
return verify_headers


ANTHROPIC_MODEL_FIXTURE = Callable[..., AnthropicModel]


# =============================================================================
# Supported Model Tests (claude-sonnet-4-5)
# =============================================================================
Expand Down
5 changes: 3 additions & 2 deletions tests/models/test_bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
from typing import Any

import pytest
from botocore.exceptions import ClientError
from inline_snapshot import snapshot
from mypy_boto3_bedrock_runtime.type_defs import MessageUnionTypeDef, SystemContentBlockTypeDef, ToolTypeDef
from typing_extensions import TypedDict

from pydantic_ai import (
Expand Down Expand Up @@ -49,6 +47,9 @@
from ..conftest import IsDatetime, IsInstance, IsStr, try_import

with try_import() as imports_successful:
from botocore.exceptions import ClientError
from mypy_boto3_bedrock_runtime.type_defs import MessageUnionTypeDef, SystemContentBlockTypeDef, ToolTypeDef

from pydantic_ai.models.bedrock import BedrockConverseModel, BedrockModelName, BedrockModelSettings
from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings
from pydantic_ai.providers.bedrock import BedrockProvider
Expand Down
10 changes: 5 additions & 5 deletions tests/models/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -4640,17 +4640,17 @@ def get_country() -> str:
'error_class,error_response,expected_status',
[
(
errors.ServerError,
'ServerError',
{'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}},
503,
),
(
errors.ClientError,
'ClientError',
{'error': {'code': 400, 'message': 'Invalid request parameters', 'status': 'INVALID_ARGUMENT'}},
400,
),
(
errors.ClientError,
'ClientError',
{'error': {'code': 429, 'message': 'Rate limit exceeded', 'status': 'RESOURCE_EXHAUSTED'}},
429,
),
Expand All @@ -4660,12 +4660,12 @@ async def test_google_api_errors_are_handled(
allow_model_requests: None,
google_provider: GoogleProvider,
mocker: MockerFixture,
error_class: type[errors.APIError],
error_class: str,
error_response: dict[str, Any],
expected_status: int,
):
model = GoogleModel('gemini-1.5-flash', provider=google_provider)
mocked_error = error_class(expected_status, error_response)
mocked_error = getattr(errors, error_class)(expected_status, error_response)
mocker.patch.object(model.client.aio.models, 'generate_content', side_effect=mocked_error)

agent = Agent(model=model)
Expand Down
47 changes: 24 additions & 23 deletions tests/models/test_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,10 @@
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from functools import cached_property
from typing import Any, Literal, cast
from typing import TYPE_CHECKING, Any, Literal, cast
from unittest.mock import Mock

import aiohttp
import pytest
from huggingface_hub import (
AsyncInferenceClient,
ChatCompletionInputMessage,
ChatCompletionOutput,
ChatCompletionOutputComplete,
ChatCompletionOutputFunctionDefinition,
ChatCompletionOutputMessage,
ChatCompletionOutputToolCall,
ChatCompletionOutputUsage,
ChatCompletionStreamOutput,
ChatCompletionStreamOutputChoice,
ChatCompletionStreamOutputDelta,
ChatCompletionStreamOutputDeltaToolCall,
ChatCompletionStreamOutputFunction,
ChatCompletionStreamOutputUsage,
)
from huggingface_hub.errors import HfHubHTTPError
from inline_snapshot import snapshot
from typing_extensions import TypedDict
Expand All @@ -50,8 +33,6 @@
VideoUrl,
)
from pydantic_ai.exceptions import ModelHTTPError
from pydantic_ai.models.huggingface import HuggingFaceModel
from pydantic_ai.providers.huggingface import HuggingFaceProvider
from pydantic_ai.result import RunUsage
from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
from pydantic_ai.settings import ModelSettings
Expand All @@ -62,10 +43,30 @@
from .mock_async_stream import MockAsyncStream

with try_import() as imports_successful:
pass
import aiohttp
from huggingface_hub import (
AsyncInferenceClient,
ChatCompletionInputMessage,
ChatCompletionOutput,
ChatCompletionOutputComplete,
ChatCompletionOutputFunctionDefinition,
ChatCompletionOutputMessage,
ChatCompletionOutputToolCall,
ChatCompletionOutputUsage,
ChatCompletionStreamOutput,
ChatCompletionStreamOutputChoice,
ChatCompletionStreamOutputDelta,
ChatCompletionStreamOutputDeltaToolCall,
ChatCompletionStreamOutputFunction,
ChatCompletionStreamOutputUsage,
)

from pydantic_ai.models.huggingface import HuggingFaceModel
from pydantic_ai.providers.huggingface import HuggingFaceProvider

MockChatCompletion = ChatCompletionOutput | Exception
MockStreamEvent = ChatCompletionStreamOutput | Exception
if TYPE_CHECKING:
MockChatCompletion = ChatCompletionOutput | Exception
MockStreamEvent = ChatCompletionStreamOutput | Exception

pytestmark = [
pytest.mark.skipif(not imports_successful(), reason='huggingface_hub not installed'),
Expand Down
32 changes: 15 additions & 17 deletions tests/models/test_model_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,24 +51,22 @@ def vcr_config(): # pragma: lax no cover
}


_PROVIDER_TO_MODEL_NAMES = {
'anthropic': AnthropicModelName,
'bedrock': BedrockModelName,
'cohere': CohereModelName,
'deepseek': DeepSeekModelName,
'google-gla': GoogleModelName,
'google-vertex': GoogleModelName,
'grok': GrokModelName,
'groq': GroqModelName,
'huggingface': HuggingFaceModelName,
'mistral': MistralModelName,
'moonshotai': MoonshotAIModelName,
'openai': OpenAIModelName,
}


def test_known_model_names(): # pragma: lax no cover
# Coverage seems to be misbehaving..?
_PROVIDER_TO_MODEL_NAMES = {
'anthropic': AnthropicModelName,
'bedrock': BedrockModelName,
'cohere': CohereModelName,
'deepseek': DeepSeekModelName,
'google-gla': GoogleModelName,
'google-vertex': GoogleModelName,
'grok': GrokModelName,
'groq': GroqModelName,
'huggingface': HuggingFaceModelName,
'mistral': MistralModelName,
'moonshotai': MoonshotAIModelName,
'openai': OpenAIModelName,
}

def get_model_names(model_name_type: Any) -> Iterator[str]:
for arg in get_args(model_name_type):
if isinstance(arg, str):
Expand Down
8 changes: 4 additions & 4 deletions tests/test_ui_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@

from .conftest import try_import

with try_import() as starlette_import_successful:
with try_import() as imports_successful:
# Used on a test in this file.
import openai # pyright: ignore[reportUnusedImport] # noqa: F401
from starlette.applications import Starlette
from starlette.testclient import TestClient

from pydantic_ai.builtin_tools import WebSearchTool
from pydantic_ai.ui._web import create_web_app


pytestmark = [
pytest.mark.skipif(not starlette_import_successful(), reason='starlette not installed'),
]
pytestmark = [pytest.mark.skipif(not imports_successful(), reason='starlette not installed')]


def test_agent_to_web():
Expand Down
Loading