pydantic
diff --git a/‎docs/durable_execution/temporal.md‎
Lines changed: 80 additions & 0 deletions b/‎docs/durable_execution/temporal.md‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/_agent_graph.py‎
Lines changed: 24 additions & 21 deletions b/‎pydantic_ai_slim/pydantic_ai/_agent_graph.py‎
Lines changed: 24 additions & 21 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/_run_context.py‎
Lines changed: 36 additions & 1 deletion b/‎pydantic_ai_slim/pydantic_ai/_run_context.py‎
Lines changed: 36 additions & 1 deletion
@@ -184,6 +184,86 @@ As the streaming model request activity, workflow, and workflow execution call a
 - To get data from the workflow call site or workflow to the event stream handler, you can use a [dependencies object](#agent-run-context-and-dependencies).
 - To get data from the event stream handler to the workflow, workflow call site, or a frontend, you need to use an external system that the event stream handler can write to and the event consumer can read from, like a message queue. You can use the dependency object to make sure the same connection string or other unique ID is available in all the places that need it.
 
+### Model Selection at Runtime
+
+[`Agent.run(model=...)`][pydantic_ai.agent.Agent.run] normally supports both model strings (like `'openai:gpt-5.2'`) and model instances. However, `TemporalAgent` does not support arbitrary model instances because they cannot be serialized for Temporal's replay mechanism.
+
+To use model instances with `TemporalAgent`, you need to pre-register them by passing a dict of model instances to `TemporalAgent(models={...})`. You can then reference them by name or by passing the registered instance directly. If the wrapped agent doesn't have a model set, the first registered model will be used as the default.
+
+Model strings work as expected. For scenarios where you need to customize the provider used by the model string (e.g., inject API keys from deps), you can pass a `provider_factory` to `TemporalAgent`, which is passed the [`RunContext`][pydantic_ai.tools.RunContext] and provider name.
+
+Here's an example showing how to pre-register and use multiple models:
+
+```python {title="multi_model_temporal.py" test="skip"}
+from dataclasses import dataclass
+from typing import Any
+
+from temporalio import workflow
+
+from pydantic_ai import Agent
+from pydantic_ai.durable_exec.temporal import TemporalAgent
+from pydantic_ai.models.anthropic import AnthropicModel
+from pydantic_ai.models.google import GoogleModel
+from pydantic_ai.models.openai import OpenAIResponsesModel
+from pydantic_ai.providers import Provider
+from pydantic_ai.tools import RunContext
+
+
+@dataclass
+class Deps:
+    openai_api_key: str | None = None
+    anthropic_api_key: str | None = None
+
+
+# Create models from different providers
+default_model = OpenAIResponsesModel('gpt-5.2')
+fast_model = AnthropicModel('claude-sonnet-4-5')
+reasoning_model = GoogleModel('gemini-2.5-pro')
+
+
+# Optional: provider factory for dynamic model configuration
+def my_provider_factory(run_context: RunContext[Deps], provider_name: str) -> Provider[Any]:
+    """Create providers with custom configuration based on run context."""
+    if provider_name == 'openai':
+        from pydantic_ai.providers.openai import OpenAIProvider
+
+        return OpenAIProvider(api_key=run_context.deps.openai_api_key)
+    elif provider_name == 'anthropic':
+        from pydantic_ai.providers.anthropic import AnthropicProvider
+
+        return AnthropicProvider(api_key=run_context.deps.anthropic_api_key)
+    else:
+        raise ValueError(f'Unknown provider: {provider_name}')
+
+
+agent = Agent(default_model, name='multi_model_agent', deps_type=Deps)
+
+temporal_agent = TemporalAgent(
+    agent,
+    models={
+        'fast': fast_model,
+        'reasoning': reasoning_model,
+    },
+    provider_factory=my_provider_factory,  # Optional
+)
+
+
+@workflow.defn
+class MultiModelWorkflow:
+    @workflow.run
+    async def run(self, prompt: str, use_reasoning: bool, use_fast: bool) -> str:
+        if use_reasoning:
+            # Select by registered name
+            result = await temporal_agent.run(prompt, model='reasoning')
+        elif use_fast:
+            # Or pass the registered instance directly
+            result = await temporal_agent.run(prompt, model=fast_model)
+        else:
+            # Or pass a model string (uses provider_factory if set)
+            result = await temporal_agent.run(prompt, model='openai:gpt-4.1-mini')
+        return result.output
+```
+
 ## Activity Configuration
 
 Temporal activity configuration, like timeouts and retry policies, can be customized by passing [`temporalio.workflow.ActivityConfig`](https://python.temporal.io/temporalio.workflow.ActivityConfig.html) objects to the `TemporalAgent` constructor:
 
@@ -26,6 +26,7 @@
 from pydantic_graph.nodes import End, NodeRunEndT
 
 from . import _output, _system_prompt, exceptions, messages as _messages, models, result, usage as _usage
+from ._run_context import set_current_run_context
 from .exceptions import ToolRetryError
 from .output import OutputDataT, OutputSpec
 from .settings import ModelSettings
@@ -447,25 +448,26 @@ async def stream(
         assert not self._did_stream, 'stream() should only be called once per node'
 
         model_settings, model_request_parameters, message_history, run_context = await self._prepare_request(ctx)
-        async with ctx.deps.model.request_stream(
-            message_history, model_settings, model_request_parameters, run_context
-        ) as streamed_response:
-            self._did_stream = True
-            ctx.state.usage.requests += 1
-            agent_stream = result.AgentStream[DepsT, T](
-                _raw_stream_response=streamed_response,
-                _output_schema=ctx.deps.output_schema,
-                _model_request_parameters=model_request_parameters,
-                _output_validators=ctx.deps.output_validators,
-                _run_ctx=build_run_context(ctx),
-                _usage_limits=ctx.deps.usage_limits,
-                _tool_manager=ctx.deps.tool_manager,
-            )
-            yield agent_stream
-            # In case the user didn't manually consume the full stream, ensure it is fully consumed here,
-            # otherwise usage won't be properly counted:
-            async for _ in agent_stream:
-                pass
+        with set_current_run_context(run_context):
+            async with ctx.deps.model.request_stream(
+                message_history, model_settings, model_request_parameters, run_context
+            ) as streamed_response:
+                self._did_stream = True
+                ctx.state.usage.requests += 1
+                agent_stream = result.AgentStream[DepsT, T](
+                    _raw_stream_response=streamed_response,
+                    _output_schema=ctx.deps.output_schema,
+                    _model_request_parameters=model_request_parameters,
+                    _output_validators=ctx.deps.output_validators,
+                    _run_ctx=build_run_context(ctx),
+                    _usage_limits=ctx.deps.usage_limits,
+                    _tool_manager=ctx.deps.tool_manager,
+                )
+                yield agent_stream
+                # In case the user didn't manually consume the full stream, ensure it is fully consumed here,
+                # otherwise usage won't be properly counted:
+                async for _ in agent_stream:
+                    pass
 
         model_response = streamed_response.get()
 
@@ -478,8 +480,9 @@ async def _make_request(
         if self._result is not None:
             return self._result  # pragma: no cover
 
-        model_settings, model_request_parameters, message_history, _ = await self._prepare_request(ctx)
-        model_response = await ctx.deps.model.request(message_history, model_settings, model_request_parameters)
+        model_settings, model_request_parameters, message_history, run_context = await self._prepare_request(ctx)
+        with set_current_run_context(run_context):
+            model_response = await ctx.deps.model.request(message_history, model_settings, model_request_parameters)
         ctx.state.usage.requests += 1
 
         return self._finish_handling(ctx, model_response)
 
@@ -1,7 +1,9 @@
 from __future__ import annotations as _annotations
 
 import dataclasses
-from collections.abc import Sequence
+from collections.abc import Iterator, Sequence
+from contextlib import contextmanager
+from contextvars import ContextVar
 from dataclasses import field
 from typing import TYPE_CHECKING, Any, Generic
 
@@ -71,3 +73,36 @@ def last_attempt(self) -> bool:
         return self.retry == self.max_retries
 
     __repr__ = _utils.dataclasses_no_defaults_repr
+
+
+_CURRENT_RUN_CONTEXT: ContextVar[RunContext[Any] | None] = ContextVar(
+    'pydantic_ai.current_run_context',
+    default=None,
+)
+"""Context variable storing the current [`RunContext`][pydantic_ai.tools.RunContext]."""
+
+
+def get_current_run_context() -> RunContext[Any] | None:
+    """Get the current run context, if one is set.
+
+    Returns:
+        The current [`RunContext`][pydantic_ai.tools.RunContext], or `None` if not in an agent run.
+    """
+    return _CURRENT_RUN_CONTEXT.get()
+
+
+@contextmanager
+def set_current_run_context(run_context: RunContext[Any]) -> Iterator[None]:
+    """Context manager to set the current run context.
+
+    Args:
+        run_context: The run context to set as current.
+
+    Yields:
+        None
+    """
+    token = _CURRENT_RUN_CONTEXT.set(run_context)
+    try:
+        yield
+    finally:
+        _CURRENT_RUN_CONTEXT.reset(token)