diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f38236de9e..df7cf3f1c6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -169,9 +169,8 @@ jobs:
 
       - run: mkdir .coverage
 
-      - run: uv sync --only-dev
-
-      - run: uv run mcp-run-python example --deps=numpy
+      # We install the `--group dev` first because if we use `--package` together, it will try to install the `dev` group from the package.
+      - run: uv sync --group dev
 
       - name: cache HuggingFace models
         uses: actions/cache@v4
@@ -195,16 +194,16 @@ jobs:
   test-lowest-versions:
     name: test on ${{ matrix.python-version }} (lowest-versions)
     runs-on: ubuntu-latest
-    timeout-minutes: 20
+    timeout-minutes: 35
     strategy:
       fail-fast: false
       matrix:
-        # TODO(Marcelo): Enable 3.11 again.
-        python-version: ["3.10", "3.12", "3.13"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
     env:
       CI: true
       COVERAGE_PROCESS_START: ./pyproject.toml
       RUN_LLAMA_CPP_TESTS: false
+      UV_FROZEN: "0"
     steps:
       - uses: actions/checkout@v4
 
@@ -220,9 +219,7 @@ jobs:
 
       - run: mkdir .coverage
 
-      - run: uv sync --group dev
-
-      - run: uv run mcp-run-python example --deps=numpy
+      - run: uv sync --group dev --resolution lowest-direct --all-extras
 
       - name: cache HuggingFace models
         uses: actions/cache@v4
@@ -232,9 +229,7 @@ jobs:
           restore-keys: |
             hf-${{ runner.os }}-
 
-      - run: unset UV_FROZEN
-
-      - run: uv run --all-extras --resolution lowest-direct coverage run -m pytest --durations=100 -n auto --dist=loadgroup
+      - run: uv run --no-sync coverage run -m pytest --durations=100 -n auto --dist=loadgroup
         env:
           COVERAGE_FILE: .coverage/.coverage.${{matrix.python-version}}-lowest-versions
 
diff --git a/pydantic_ai_slim/pydantic_ai/ui/_web/app.py b/pydantic_ai_slim/pydantic_ai/ui/_web/app.py
index cb751388f6..ba1a40e593 100644
--- a/pydantic_ai_slim/pydantic_ai/ui/_web/app.py
+++ b/pydantic_ai_slim/pydantic_ai/ui/_web/app.py
@@ -121,12 +121,7 @@ async def index(request: Request) -> Response:
                 status_code=502,
             )
 
-        return HTMLResponse(
-            content=content,
-            headers={
-                'Cache-Control': 'public, max-age=3600',
-            },
-        )
+        return HTMLResponse(content=content, headers={'Cache-Control': 'public, max-age=3600'})
 
     app.router.add_route('/', index, methods=['GET'])
     app.router.add_route('/{id}', index, methods=['GET'])
diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
index 8d196a813b..1d9c23c60e 100644
--- a/pydantic_ai_slim/pyproject.toml
+++ b/pydantic_ai_slim/pyproject.toml
@@ -57,7 +57,7 @@ dependencies = [
     "httpx>=0.27",
     "pydantic>=2.10",
     "pydantic-graph=={{ version }}",
-    "exceptiongroup; python_version < '3.11'",
+    "exceptiongroup>=1.2.2; python_version < '3.11'",
     "opentelemetry-api>=1.28.0",
     "typing-inspection>=0.4.0",
     "genai-prices>=0.0.40",
@@ -77,11 +77,11 @@ openrouter = ["openai>=2.8.0"]
 mistral = ["mistralai>=1.9.10"]
 bedrock = ["boto3>=1.40.14"]
 huggingface = ["huggingface-hub[inference]>=0.33.5,<1.0.0"]
-outlines-transformers = ["outlines[transformers]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "transformers>=4.0.0", "pillow", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
+outlines-transformers = ["outlines[transformers]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "transformers>=4.0.0", "pillow>11.0.0", "torch>=2.8.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
 outlines-llamacpp = ["outlines[llamacpp]>=1.0.0, <1.3.0"]
 outlines-mlxlm = ["outlines[mlxlm]>=1.0.0, <1.3.0; platform_system == 'Darwin' and platform_machine == 'arm64'"]
-outlines-sglang = ["outlines[sglang]>=1.0.0, <1.3.0", "pillow"]
-outlines-vllm-offline = ["vllm; python_version < '3.12' and (sys_platform != 'darwin' or platform_machine != 'x86_64')", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "outlines>=1.0.0, <1.3.0"]
+outlines-sglang = ["outlines[sglang]>=1.0.0, <1.3.0", "pillow>11.0.0"]
+outlines-vllm-offline = ["vllm>=0.8.0; python_version < '3.12' and (sys_platform != 'darwin' or platform_machine != 'x86_64')", "torch>=2.8.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "outlines>=1.0.0, <1.3.0"]
 # Tools
 duckduckgo = ["ddgs>=9.0.0"]
 tavily = ["tavily-python>=0.5.0"]
diff --git a/pyproject.toml b/pyproject.toml
index 48a77218ce..d6ece26432 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -104,7 +104,7 @@ dev = [
     "pytest-pretty>=1.3.0",
     "pytest-recording>=0.13.2",
     "diff-cover>=9.2.0",
-    "boto3-stubs[bedrock-runtime]",
+    "boto3-stubs[bedrock-runtime]>=1.40.64",
     "strict-no-cover @ git+https://github.com/pydantic/strict-no-cover.git@7fc59da2c4dff919db2095a0f0e47101b657131d",
     "pytest-xdist>=3.6.1",
     # Needed for PyCharm users
diff --git a/tests/models/anthropic/conftest.py b/tests/models/anthropic/conftest.py
index 6edb7d19f2..2b1600a161 100644
--- a/tests/models/anthropic/conftest.py
+++ b/tests/models/anthropic/conftest.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Callable
 from functools import cache
+from typing import TYPE_CHECKING
 
 import pytest
 
@@ -18,7 +19,8 @@
     from pydantic_ai.models.anthropic import AnthropicModel
     from pydantic_ai.providers.anthropic import AnthropicProvider
 
-AnthropicModelFactory = Callable[..., AnthropicModel]
+if TYPE_CHECKING:
+    AnthropicModelFactory = Callable[..., AnthropicModel]
 
 
 # Model factory fixture for live API tests
@@ -27,10 +29,7 @@ def anthropic_model(anthropic_api_key: str) -> AnthropicModelFactory:
     """Factory to create Anthropic models with custom configuration."""
 
     @cache
-    def _create_model(
-        model_name: str,
-        api_key: str | None = None,
-    ) -> AnthropicModel:
+    def _create_model(model_name: str, api_key: str | None = None) -> AnthropicModel:
         """Create an AnthropicModel with the specified configuration.
 
         Args:
diff --git a/tests/models/anthropic/test_output.py b/tests/models/anthropic/test_output.py
index 714cbdc435..e46f4d1f34 100644
--- a/tests/models/anthropic/test_output.py
+++ b/tests/models/anthropic/test_output.py
@@ -12,7 +12,7 @@
 from __future__ import annotations as _annotations
 
 from collections.abc import Callable
-from typing import Annotated
+from typing import TYPE_CHECKING, Annotated
 
 import httpx
 import pytest
@@ -35,6 +35,9 @@
 
 from ..test_anthropic import completion_message
 
+if TYPE_CHECKING:
+    ANTHROPIC_MODEL_FIXTURE = Callable[..., AnthropicModel]
+
 pytestmark = [
     pytest.mark.skipif(not imports_successful(), reason='anthropic not installed'),
     pytest.mark.anyio,
@@ -231,9 +234,6 @@ async def verify_headers(request: httpx.Request):
     return verify_headers
 
 
-ANTHROPIC_MODEL_FIXTURE = Callable[..., AnthropicModel]
-
-
 # =============================================================================
 # Supported Model Tests (claude-sonnet-4-5)
 # =============================================================================
diff --git a/tests/models/test_bedrock.py b/tests/models/test_bedrock.py
index 9647a0eb7c..d8f3df4d59 100644
--- a/tests/models/test_bedrock.py
+++ b/tests/models/test_bedrock.py
@@ -5,9 +5,7 @@
 from typing import Any
 
 import pytest
-from botocore.exceptions import ClientError
 from inline_snapshot import snapshot
-from mypy_boto3_bedrock_runtime.type_defs import MessageUnionTypeDef, SystemContentBlockTypeDef, ToolTypeDef
 from typing_extensions import TypedDict
 
 from pydantic_ai import (
@@ -49,6 +47,9 @@
 from ..conftest import IsDatetime, IsInstance, IsStr, try_import
 
 with try_import() as imports_successful:
+    from botocore.exceptions import ClientError
+    from mypy_boto3_bedrock_runtime.type_defs import MessageUnionTypeDef, SystemContentBlockTypeDef, ToolTypeDef
+
     from pydantic_ai.models.bedrock import BedrockConverseModel, BedrockModelName, BedrockModelSettings
     from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings
     from pydantic_ai.providers.bedrock import BedrockProvider
diff --git a/tests/models/test_google.py b/tests/models/test_google.py
index c82afbff34..46037ae335 100644
--- a/tests/models/test_google.py
+++ b/tests/models/test_google.py
@@ -4640,17 +4640,17 @@ def get_country() -> str:
     'error_class,error_response,expected_status',
     [
         (
-            errors.ServerError,
+            'ServerError',
             {'error': {'code': 503, 'message': 'The service is currently unavailable.', 'status': 'UNAVAILABLE'}},
             503,
         ),
         (
-            errors.ClientError,
+            'ClientError',
             {'error': {'code': 400, 'message': 'Invalid request parameters', 'status': 'INVALID_ARGUMENT'}},
             400,
         ),
         (
-            errors.ClientError,
+            'ClientError',
             {'error': {'code': 429, 'message': 'Rate limit exceeded', 'status': 'RESOURCE_EXHAUSTED'}},
             429,
         ),
@@ -4660,12 +4660,12 @@ async def test_google_api_errors_are_handled(
     allow_model_requests: None,
     google_provider: GoogleProvider,
     mocker: MockerFixture,
-    error_class: type[errors.APIError],
+    error_class: str,
     error_response: dict[str, Any],
     expected_status: int,
 ):
     model = GoogleModel('gemini-1.5-flash', provider=google_provider)
-    mocked_error = error_class(expected_status, error_response)
+    mocked_error = getattr(errors, error_class)(expected_status, error_response)
     mocker.patch.object(model.client.aio.models, 'generate_content', side_effect=mocked_error)
 
     agent = Agent(model=model)
diff --git a/tests/models/test_huggingface.py b/tests/models/test_huggingface.py
index ed99de4e56..6d810bc9d9 100644
--- a/tests/models/test_huggingface.py
+++ b/tests/models/test_huggingface.py
@@ -5,28 +5,10 @@
 from dataclasses import asdict, dataclass, field
 from datetime import datetime, timezone
 from functools import cached_property
-from typing import Any, Literal, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 from unittest.mock import Mock
 
-import aiohttp
 import pytest
-from huggingface_hub import (
-    AsyncInferenceClient,
-    ChatCompletionInputMessage,
-    ChatCompletionOutput,
-    ChatCompletionOutputComplete,
-    ChatCompletionOutputFunctionDefinition,
-    ChatCompletionOutputMessage,
-    ChatCompletionOutputToolCall,
-    ChatCompletionOutputUsage,
-    ChatCompletionStreamOutput,
-    ChatCompletionStreamOutputChoice,
-    ChatCompletionStreamOutputDelta,
-    ChatCompletionStreamOutputDeltaToolCall,
-    ChatCompletionStreamOutputFunction,
-    ChatCompletionStreamOutputUsage,
-)
-from huggingface_hub.errors import HfHubHTTPError
 from inline_snapshot import snapshot
 from typing_extensions import TypedDict
 
@@ -50,8 +32,6 @@
     VideoUrl,
 )
 from pydantic_ai.exceptions import ModelHTTPError
-from pydantic_ai.models.huggingface import HuggingFaceModel
-from pydantic_ai.providers.huggingface import HuggingFaceProvider
 from pydantic_ai.result import RunUsage
 from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
 from pydantic_ai.settings import ModelSettings
@@ -62,10 +42,31 @@
 from .mock_async_stream import MockAsyncStream
 
 with try_import() as imports_successful:
-    pass
+    import aiohttp
+    from huggingface_hub import (
+        AsyncInferenceClient,
+        ChatCompletionInputMessage,
+        ChatCompletionOutput,
+        ChatCompletionOutputComplete,
+        ChatCompletionOutputFunctionDefinition,
+        ChatCompletionOutputMessage,
+        ChatCompletionOutputToolCall,
+        ChatCompletionOutputUsage,
+        ChatCompletionStreamOutput,
+        ChatCompletionStreamOutputChoice,
+        ChatCompletionStreamOutputDelta,
+        ChatCompletionStreamOutputDeltaToolCall,
+        ChatCompletionStreamOutputFunction,
+        ChatCompletionStreamOutputUsage,
+    )
+    from huggingface_hub.errors import HfHubHTTPError
+
+    from pydantic_ai.models.huggingface import HuggingFaceModel
+    from pydantic_ai.providers.huggingface import HuggingFaceProvider
 
-MockChatCompletion = ChatCompletionOutput | Exception
-MockStreamEvent = ChatCompletionStreamOutput | Exception
+if TYPE_CHECKING:
+    MockChatCompletion = ChatCompletionOutput | Exception
+    MockStreamEvent = ChatCompletionStreamOutput | Exception
 
 pytestmark = [
     pytest.mark.skipif(not imports_successful(), reason='huggingface_hub not installed'),
@@ -104,9 +105,9 @@ async def chat_completions_create(
         if stream or self.stream:
             assert self.stream is not None, 'you can only use `stream=True` if `stream` is provided'
             if isinstance(self.stream[0], Sequence):
-                response = MockAsyncStream(iter(cast(list[MockStreamEvent], self.stream[self.index])))
+                response = MockAsyncStream(iter(cast(list['MockStreamEvent'], self.stream[self.index])))
             else:
-                response = MockAsyncStream(iter(cast(list[MockStreamEvent], self.stream)))
+                response = MockAsyncStream(iter(cast(list['MockStreamEvent'], self.stream)))
         else:
             assert self.completions is not None, 'you can only use `stream=False` if `completions` are provided'
             if isinstance(self.completions, Sequence):
diff --git a/tests/models/test_model_names.py b/tests/models/test_model_names.py
index a2b3e8130e..f01569e597 100644
--- a/tests/models/test_model_names.py
+++ b/tests/models/test_model_names.py
@@ -51,24 +51,22 @@ def vcr_config():  # pragma: lax no cover
     }
 
 
-_PROVIDER_TO_MODEL_NAMES = {
-    'anthropic': AnthropicModelName,
-    'bedrock': BedrockModelName,
-    'cohere': CohereModelName,
-    'deepseek': DeepSeekModelName,
-    'google-gla': GoogleModelName,
-    'google-vertex': GoogleModelName,
-    'grok': GrokModelName,
-    'groq': GroqModelName,
-    'huggingface': HuggingFaceModelName,
-    'mistral': MistralModelName,
-    'moonshotai': MoonshotAIModelName,
-    'openai': OpenAIModelName,
-}
-
-
 def test_known_model_names():  # pragma: lax no cover
-    # Coverage seems to be misbehaving..?
+    _PROVIDER_TO_MODEL_NAMES = {
+        'anthropic': AnthropicModelName,
+        'bedrock': BedrockModelName,
+        'cohere': CohereModelName,
+        'deepseek': DeepSeekModelName,
+        'google-gla': GoogleModelName,
+        'google-vertex': GoogleModelName,
+        'grok': GrokModelName,
+        'groq': GroqModelName,
+        'huggingface': HuggingFaceModelName,
+        'mistral': MistralModelName,
+        'moonshotai': MoonshotAIModelName,
+        'openai': OpenAIModelName,
+    }
+
     def get_model_names(model_name_type: Any) -> Iterator[str]:
         for arg in get_args(model_name_type):
             if isinstance(arg, str):
diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py
index 18016ccf4c..28b6ca3a68 100644
--- a/tests/models/test_openai_responses.py
+++ b/tests/models/test_openai_responses.py
@@ -45,10 +45,6 @@
     BuiltinToolResultEvent,  # pyright: ignore[reportDeprecated]
 )
 from pydantic_ai.models import ModelRequestParameters
-from pydantic_ai.models.openai import (
-    OpenAIResponsesModelSettings,
-    _resolve_openai_image_generation_size,  # pyright: ignore[reportPrivateUsage]
-)
 from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput
 from pydantic_ai.profiles.openai import openai_model_profile
 from pydantic_ai.tools import ToolDefinition
@@ -68,7 +64,11 @@
     from openai.types.responses.response_usage import ResponseUsage
 
     from pydantic_ai.models.anthropic import AnthropicModel, AnthropicModelSettings
-    from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings
+    from pydantic_ai.models.openai import (
+        OpenAIResponsesModel,
+        OpenAIResponsesModelSettings,
+        _resolve_openai_image_generation_size,  # pyright: ignore[reportPrivateUsage]
+    )
     from pydantic_ai.providers.anthropic import AnthropicProvider
     from pydantic_ai.providers.openai import OpenAIProvider
 
diff --git a/tests/profiles/test_anthropic.py b/tests/profiles/test_anthropic.py
index 365d201da4..62ed0fa404 100644
--- a/tests/profiles/test_anthropic.py
+++ b/tests/profiles/test_anthropic.py
@@ -22,12 +22,11 @@
 from inline_snapshot import snapshot
 from pydantic import BaseModel, Field
 
-from pydantic_ai.providers.anthropic import AnthropicJsonSchemaTransformer
-
 from ..conftest import try_import
 
 with try_import() as imports_successful:
     from pydantic_ai.profiles.anthropic import anthropic_model_profile
+    from pydantic_ai.providers.anthropic import AnthropicJsonSchemaTransformer
 
 pytestmark = [
     pytest.mark.skipif(not imports_successful(), reason='anthropic not installed'),
diff --git a/tests/providers/test_bedrock.py b/tests/providers/test_bedrock.py
index 784c390204..2461ab5a54 100644
--- a/tests/providers/test_bedrock.py
+++ b/tests/providers/test_bedrock.py
@@ -17,8 +17,14 @@
     from mypy_boto3_bedrock_runtime import BedrockRuntimeClient
 
     from pydantic_ai.models.bedrock import LatestBedrockModelNames
-    from pydantic_ai.providers.bedrock import BEDROCK_GEO_PREFIXES, BedrockModelProfile, BedrockProvider
+    from pydantic_ai.providers.bedrock import BedrockModelProfile, BedrockProvider
 
+if not imports_successful():
+    bedrock_geo_prefixes = ()
+else:
+    from pydantic_ai.providers.bedrock import BEDROCK_GEO_PREFIXES
+
+    bedrock_geo_prefixes = BEDROCK_GEO_PREFIXES
 
 pytestmark = pytest.mark.skipif(not imports_successful(), reason='bedrock not installed')
 
@@ -108,7 +114,7 @@ def test_bedrock_provider_model_profile(env: TestEnv, mocker: MockerFixture):
     assert unknown_model is None
 
 
-@pytest.mark.parametrize('prefix', BEDROCK_GEO_PREFIXES)
+@pytest.mark.parametrize('prefix', bedrock_geo_prefixes)
 def test_bedrock_provider_model_profile_all_geo_prefixes(env: TestEnv, prefix: str):
     """Test that all cross-region inference geo prefixes are correctly handled."""
     env.set('AWS_DEFAULT_REGION', 'us-east-1')
@@ -146,7 +152,7 @@ def test_latest_bedrock_model_names_geo_prefixes_are_supported():
         parts = model_name.split('.')
         if len(parts) >= 3:
             geo_prefix = parts[0]
-            if geo_prefix not in BEDROCK_GEO_PREFIXES:  # pragma: no cover
+            if geo_prefix not in bedrock_geo_prefixes:  # pragma: no cover
                 missing_prefixes.add(geo_prefix)
 
     if missing_prefixes:  # pragma: no cover
diff --git a/tests/test_ui_web.py b/tests/test_ui_web.py
index ab9fa1066e..388d4c2565 100644
--- a/tests/test_ui_web.py
+++ b/tests/test_ui_web.py
@@ -12,7 +12,9 @@
 
 from .conftest import try_import
 
-with try_import() as starlette_import_successful:
+with try_import() as imports_successful:
+    # Used on a test in this file.
+    import openai  # pyright: ignore[reportUnusedImport] # noqa: F401
     from starlette.applications import Starlette
     from starlette.testclient import TestClient
 
@@ -20,9 +22,7 @@
     from pydantic_ai.ui._web import create_web_app
 
 
-pytestmark = [
-    pytest.mark.skipif(not starlette_import_successful(), reason='starlette not installed'),
-]
+pytestmark = [pytest.mark.skipif(not imports_successful(), reason='starlette not installed')]
 
 
 def test_agent_to_web():