Add support for ImageGenerationTool.size to Gemini image models (#3720)

h0rv · web-flow · commit c1213e4a7a97 · 2025-12-17T15:04:07.000Z
diff --git a/docs/builtin-tools.md b/docs/builtin-tools.md
@@ -353,6 +353,23 @@ assert isinstance(result.output, BinaryImage)
 
 _(This example is complete, it can be run "as is")_
 
+To control the image resolution with Google image generation models (starting with Gemini 3 Pro Image), use the `size` parameter:
+
+```py {title="image_generation_google_resolution.py"}
+from pydantic_ai import Agent, BinaryImage, ImageGenerationTool
+
+agent = Agent(
+    'google-gla:gemini-3-pro-image-preview',
+    builtin_tools=[ImageGenerationTool(aspect_ratio='16:9', size='4K')],
+    output_type=BinaryImage,
+)
+
+result = agent.run_sync('Generate a high-resolution wide landscape illustration of an axolotl.')
+assert isinstance(result.output, BinaryImage)
+```
+
+_(This example is complete, it can be run "as is")_
+
 For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool].
 
 #### Provider Support
@@ -366,8 +383,10 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG
 | `output_format` | ✅ | ❌ |
 | `partial_images` | ✅ | ❌ |
 | `quality` | ✅ | ❌ |
-| `size` | ✅ | ❌ |
-| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ |
+| `size` | ✅ (auto (default), 1024x1024, 1024x1536, 1536x1024) | ✅ (1K (default), 2K, 4K) |
+| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ (1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9) |
+
+**Note:** For OpenAI, `auto` lets the model select the value.
 
 ## Web Fetch Tool
 
diff --git a/pydantic_ai_slim/pydantic_ai/builtin_tools.py b/pydantic_ai_slim/pydantic_ai/builtin_tools.py
@@ -326,12 +326,11 @@ class ImageGenerationTool(AbstractBuiltinTool):
     * OpenAI Responses
     """
 
-    size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'] = 'auto'
+    size: Literal['auto', '1024x1024', '1024x1536', '1536x1024', '1K', '2K', '4K'] | None = None
     """The size of the generated image.
 
-    Supported by:
-
-    * OpenAI Responses
+    * OpenAI Responses: 'auto' (default: model selects the size based on the prompt), '1024x1024', '1024x1536', '1536x1024'
+    * Google (Gemini 3 Pro Image and later): '1K' (default), '2K', '4K'
     """
 
     aspect_ratio: ImageAspectRatio | None = None
diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py
@@ -137,6 +137,9 @@
     GoogleFinishReason.NO_IMAGE: 'error',
 }
 
+_GOOGLE_IMAGE_SIZE = Literal['1K', '2K', '4K']
+_GOOGLE_IMAGE_SIZES: tuple[_GOOGLE_IMAGE_SIZE, ...] = _utils.get_args(_GOOGLE_IMAGE_SIZE)
+
 
 class GoogleModelSettings(ModelSettings, total=False):
     """Settings used for a Gemini model request."""
@@ -367,8 +370,17 @@ def _get_tools(
                         raise UserError(
                             "`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead."
                         )
-                    if tool.aspect_ratio:
-                        image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio)
+
+                    image_config = ImageConfigDict()
+                    if tool.aspect_ratio is not None:
+                        image_config['aspect_ratio'] = tool.aspect_ratio
+                    if tool.size is not None:
+                        if tool.size not in _GOOGLE_IMAGE_SIZES:
+                            raise UserError(
+                                f'Google image generation only supports `size` values: {_GOOGLE_IMAGE_SIZES}. '
+                                f'Got: {tool.size!r}. Omit `size` to use the default (1K).'
+                            )
+                        image_config['image_size'] = tool.size
                 else:  # pragma: no cover
                     raise UserError(
                         f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.'
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -159,24 +159,33 @@
     '3:2': '1536x1024',
 }
 
+_OPENAI_IMAGE_SIZE = Literal['auto', '1024x1024', '1024x1536', '1536x1024']
+_OPENAI_IMAGE_SIZES: tuple[_OPENAI_IMAGE_SIZE, ...] = _utils.get_args(_OPENAI_IMAGE_SIZE)
+
 
 def _resolve_openai_image_generation_size(
     tool: ImageGenerationTool,
-) -> Literal['auto', '1024x1024', '1024x1536', '1536x1024']:
+) -> _OPENAI_IMAGE_SIZE:
     """Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided."""
     aspect_ratio = tool.aspect_ratio
     if aspect_ratio is None:
+        if tool.size is None:
+            return 'auto'  # default
+        if tool.size not in _OPENAI_IMAGE_SIZES:
+            raise UserError(
+                f'OpenAI image generation only supports `size` values: {_OPENAI_IMAGE_SIZES}. '
+                f'Got: {tool.size}. Omit `size` to use the default (auto).'
+            )
         return tool.size
 
     mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio)
     if mapped_size is None:
         supported = ', '.join(_OPENAI_ASPECT_RATIO_TO_SIZE)
         raise UserError(
-            f'OpenAI image generation only supports `aspect_ratio` values: {supported}. '
-            'Specify one of those values or omit `aspect_ratio`.'
+            f'OpenAI image generation only supports `aspect_ratio` values: {supported}. Specify one of those values or omit `aspect_ratio`.'
         )
-
-    if tool.size not in ('auto', mapped_size):
+    # When aspect_ratio is set, size must be None, 'auto', or match the mapped size
+    if tool.size not in (None, 'auto', mapped_size):
         raise UserError(
             '`ImageGenerationTool` cannot combine `aspect_ratio` with a conflicting `size` when using OpenAI.'
         )
diff --git a/tests/models/test_google.py b/tests/models/test_google.py
@@ -3388,7 +3388,7 @@ async def test_google_image_generation_with_text(allow_model_requests: None, goo
 async def test_google_image_or_text_output(allow_model_requests: None, google_provider: GoogleProvider):
     m = GoogleModel('gemini-2.5-flash-image', provider=google_provider)
     # ImageGenerationTool is listed here to indicate just that it doesn't cause any issues, even though it's not necessary with an image model.
-    agent = Agent(m, output_type=str | BinaryImage, builtin_tools=[ImageGenerationTool()])
+    agent = Agent(m, output_type=str | BinaryImage, builtin_tools=[ImageGenerationTool(size='1K')])
 
     result = await agent.run('Tell me a two-sentence story about an axolotl, no image please.')
     assert result.output == snapshot(
@@ -3653,6 +3653,44 @@ async def test_google_image_generation_tool_aspect_ratio(google_provider: Google
     assert image_config == {'aspect_ratio': '16:9'}
 
 
+async def test_google_image_generation_resolution(google_provider: GoogleProvider) -> None:
+    """Test that resolution parameter from ImageGenerationTool is added to image_config."""
+    model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider)
+    params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='2K')])
+
+    tools, image_config = model._get_tools(params)  # pyright: ignore[reportPrivateUsage]
+    assert tools is None
+    assert image_config == {'image_size': '2K'}
+
+
+async def test_google_image_generation_resolution_with_aspect_ratio(google_provider: GoogleProvider) -> None:
+    """Test that resolution and aspect_ratio from ImageGenerationTool work together."""
+    model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider)
+    params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9', size='4K')])
+
+    tools, image_config = model._get_tools(params)  # pyright: ignore[reportPrivateUsage]
+    assert tools is None
+    assert image_config == {'aspect_ratio': '16:9', 'image_size': '4K'}
+
+
+async def test_google_image_generation_unsupported_size_raises_error(google_provider: GoogleProvider) -> None:
+    """Test that unsupported size values raise an error."""
+    model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider)
+    params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='1024x1024')])
+
+    with pytest.raises(UserError, match='Google image generation only supports `size` values'):
+        model._get_tools(params)  # pyright: ignore[reportPrivateUsage]
+
+
+async def test_google_image_generation_auto_size_raises_error(google_provider: GoogleProvider) -> None:
+    """Test that 'auto' size raises an error for Google since it doesn't support intelligent size selection."""
+    model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider)
+    params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='auto')])
+
+    with pytest.raises(UserError, match='Google image generation only supports `size` values'):
+        model._get_tools(params)  # pyright: ignore[reportPrivateUsage]
+
+
 async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider):
     model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider)
 
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
@@ -37,7 +37,7 @@
     UserPromptPart,
 )
 from pydantic_ai._json_schema import InlineDefsJsonSchemaTransformer
-from pydantic_ai.builtin_tools import WebSearchTool
+from pydantic_ai.builtin_tools import ImageGenerationTool, WebSearchTool
 from pydantic_ai.models import ModelRequestParameters
 from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput
 from pydantic_ai.profiles.openai import OpenAIModelProfile, openai_model_profile
@@ -79,6 +79,7 @@
         OpenAIResponsesModel,
         OpenAIResponsesModelSettings,
         OpenAISystemPromptRole,
+        _resolve_openai_image_generation_size,  # pyright: ignore[reportPrivateUsage]
     )
     from pydantic_ai.profiles.openai import OpenAIJsonSchemaTransformer
     from pydantic_ai.providers.cerebras import CerebrasProvider
@@ -103,6 +104,43 @@ def test_init():
     assert m.model_name == 'gpt-4o'
 
 
+@pytest.mark.parametrize(
+    'aspect_ratio,size,expected',
+    [
+        # aspect_ratio is None, various sizes
+        (None, None, 'auto'),
+        (None, 'auto', 'auto'),
+        (None, '1024x1024', '1024x1024'),
+        (None, '1024x1536', '1024x1536'),
+        (None, '1536x1024', '1536x1024'),
+        # Valid aspect_ratios with no size
+        ('1:1', None, '1024x1024'),
+        ('2:3', None, '1024x1536'),
+        ('3:2', None, '1536x1024'),
+        # Valid aspect_ratios with compatible sizes
+        ('1:1', 'auto', '1024x1024'),
+        ('1:1', '1024x1024', '1024x1024'),
+        ('2:3', '1024x1536', '1024x1536'),
+        ('3:2', '1536x1024', '1536x1024'),
+    ],
+)
+def test_openai_image_generation_size_valid_combinations(
+    aspect_ratio: Literal['1:1', '2:3', '3:2'] | None,
+    size: Literal['auto', '1024x1024', '1024x1536', '1536x1024'] | None,
+    expected: Literal['auto', '1024x1024', '1024x1536', '1536x1024'],
+) -> None:
+    """Test valid combinations of aspect_ratio and size for OpenAI image generation."""
+    tool = ImageGenerationTool(aspect_ratio=aspect_ratio, size=size)
+    assert _resolve_openai_image_generation_size(tool) == expected
+
+
+def test_openai_image_generation_tool_aspect_ratio_invalid() -> None:
+    """Test that invalid aspect_ratio raises UserError."""
+    tool = ImageGenerationTool(aspect_ratio='16:9')
+    with pytest.raises(UserError, match='OpenAI image generation only supports `aspect_ratio` values'):
+        _resolve_openai_image_generation_size(tool)
+
+
 async def test_request_simple_success(allow_model_requests: None):
     c = completion_message(
         ChatCompletionMessage(content='world', role='assistant'),
diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py
@@ -163,6 +163,12 @@ def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size
         _resolve_openai_image_generation_size(tool)
 
 
+def test_openai_responses_image_generation_tool_unsupported_size_raises_error() -> None:
+    tool = ImageGenerationTool(size='2K')
+    with pytest.raises(UserError, match='OpenAI image generation only supports `size` values'):
+        _resolve_openai_image_generation_size(tool)
+
+
 async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str):
     model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
 
diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -698,6 +698,12 @@ async def model_logic(  # noqa: C901
                     FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')),
                 ]
             )
+        elif m.content == 'Generate a high-resolution wide landscape illustration of an axolotl.':
+            return ModelResponse(
+                parts=[
+                    FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='high-res-axolotl')),
+                ]
+            )
         elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.':
             return ModelResponse(
                 parts=[

Original file line number	Diff line number	Diff line change
`@@ -698,6 +698,12 @@ async def model_logic( # noqa: C901`
`698`	`698`	`FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')),`
`699`	`699`	`]`
`700`	`700`	`)`
	`701`	`+ elif m.content == 'Generate a high-resolution wide landscape illustration of an axolotl.':`
	`702`	`+ return ModelResponse(`
	`703`	`+ parts=[`
	`704`	`+ FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='high-res-axolotl')),`
	`705`	`+ ]`
	`706`	`+ )`
`701`	`707`	`elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.':`
`702`	`708`	`return ModelResponse(`
`703`	`709`	`parts=[`