Skip to content

Commit c1213e4

Browse files
authored
Add support for ImageGenerationTool.size to Gemini image models (#3720)
1 parent 7d187d5 commit c1213e4

File tree

8 files changed

+142
-15
lines changed

8 files changed

+142
-15
lines changed

docs/builtin-tools.md

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,23 @@ assert isinstance(result.output, BinaryImage)
353353

354354
_(This example is complete, it can be run "as is")_
355355

356+
To control the image resolution with Google image generation models (starting with Gemini 3 Pro Image), use the `size` parameter:
357+
358+
```py {title="image_generation_google_resolution.py"}
359+
from pydantic_ai import Agent, BinaryImage, ImageGenerationTool
360+
361+
agent = Agent(
362+
'google-gla:gemini-3-pro-image-preview',
363+
builtin_tools=[ImageGenerationTool(aspect_ratio='16:9', size='4K')],
364+
output_type=BinaryImage,
365+
)
366+
367+
result = agent.run_sync('Generate a high-resolution wide landscape illustration of an axolotl.')
368+
assert isinstance(result.output, BinaryImage)
369+
```
370+
371+
_(This example is complete, it can be run "as is")_
372+
356373
For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool].
357374

358375
#### Provider Support
@@ -366,8 +383,10 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG
366383
| `output_format` |||
367384
| `partial_images` |||
368385
| `quality` |||
369-
| `size` |||
370-
| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) ||
386+
| `size` | ✅ (auto (default), 1024x1024, 1024x1536, 1536x1024) | ✅ (1K (default), 2K, 4K) |
387+
| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ (1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9) |
388+
389+
**Note:** For OpenAI, `auto` lets the model select the value.
371390

372391
## Web Fetch Tool
373392

pydantic_ai_slim/pydantic_ai/builtin_tools.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -326,12 +326,11 @@ class ImageGenerationTool(AbstractBuiltinTool):
326326
* OpenAI Responses
327327
"""
328328

329-
size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'] = 'auto'
329+
size: Literal['auto', '1024x1024', '1024x1536', '1536x1024', '1K', '2K', '4K'] | None = None
330330
"""The size of the generated image.
331331
332-
Supported by:
333-
334-
* OpenAI Responses
332+
* OpenAI Responses: 'auto' (default: model selects the size based on the prompt), '1024x1024', '1024x1536', '1536x1024'
333+
* Google (Gemini 3 Pro Image and later): '1K' (default), '2K', '4K'
335334
"""
336335

337336
aspect_ratio: ImageAspectRatio | None = None

pydantic_ai_slim/pydantic_ai/models/google.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@
137137
GoogleFinishReason.NO_IMAGE: 'error',
138138
}
139139

140+
_GOOGLE_IMAGE_SIZE = Literal['1K', '2K', '4K']
141+
_GOOGLE_IMAGE_SIZES: tuple[_GOOGLE_IMAGE_SIZE, ...] = _utils.get_args(_GOOGLE_IMAGE_SIZE)
142+
140143

141144
class GoogleModelSettings(ModelSettings, total=False):
142145
"""Settings used for a Gemini model request."""
@@ -367,8 +370,17 @@ def _get_tools(
367370
raise UserError(
368371
"`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead."
369372
)
370-
if tool.aspect_ratio:
371-
image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio)
373+
374+
image_config = ImageConfigDict()
375+
if tool.aspect_ratio is not None:
376+
image_config['aspect_ratio'] = tool.aspect_ratio
377+
if tool.size is not None:
378+
if tool.size not in _GOOGLE_IMAGE_SIZES:
379+
raise UserError(
380+
f'Google image generation only supports `size` values: {_GOOGLE_IMAGE_SIZES}. '
381+
f'Got: {tool.size!r}. Omit `size` to use the default (1K).'
382+
)
383+
image_config['image_size'] = tool.size
372384
else: # pragma: no cover
373385
raise UserError(
374386
f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.'

pydantic_ai_slim/pydantic_ai/models/openai.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,24 +159,33 @@
159159
'3:2': '1536x1024',
160160
}
161161

162+
_OPENAI_IMAGE_SIZE = Literal['auto', '1024x1024', '1024x1536', '1536x1024']
163+
_OPENAI_IMAGE_SIZES: tuple[_OPENAI_IMAGE_SIZE, ...] = _utils.get_args(_OPENAI_IMAGE_SIZE)
164+
162165

163166
def _resolve_openai_image_generation_size(
164167
tool: ImageGenerationTool,
165-
) -> Literal['auto', '1024x1024', '1024x1536', '1536x1024']:
168+
) -> _OPENAI_IMAGE_SIZE:
166169
"""Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided."""
167170
aspect_ratio = tool.aspect_ratio
168171
if aspect_ratio is None:
172+
if tool.size is None:
173+
return 'auto' # default
174+
if tool.size not in _OPENAI_IMAGE_SIZES:
175+
raise UserError(
176+
f'OpenAI image generation only supports `size` values: {_OPENAI_IMAGE_SIZES}. '
177+
f'Got: {tool.size}. Omit `size` to use the default (auto).'
178+
)
169179
return tool.size
170180

171181
mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio)
172182
if mapped_size is None:
173183
supported = ', '.join(_OPENAI_ASPECT_RATIO_TO_SIZE)
174184
raise UserError(
175-
f'OpenAI image generation only supports `aspect_ratio` values: {supported}. '
176-
'Specify one of those values or omit `aspect_ratio`.'
185+
f'OpenAI image generation only supports `aspect_ratio` values: {supported}. Specify one of those values or omit `aspect_ratio`.'
177186
)
178-
179-
if tool.size not in ('auto', mapped_size):
187+
# When aspect_ratio is set, size must be None, 'auto', or match the mapped size
188+
if tool.size not in (None, 'auto', mapped_size):
180189
raise UserError(
181190
'`ImageGenerationTool` cannot combine `aspect_ratio` with a conflicting `size` when using OpenAI.'
182191
)

tests/models/test_google.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3388,7 +3388,7 @@ async def test_google_image_generation_with_text(allow_model_requests: None, goo
33883388
async def test_google_image_or_text_output(allow_model_requests: None, google_provider: GoogleProvider):
33893389
m = GoogleModel('gemini-2.5-flash-image', provider=google_provider)
33903390
# ImageGenerationTool is listed here to indicate just that it doesn't cause any issues, even though it's not necessary with an image model.
3391-
agent = Agent(m, output_type=str | BinaryImage, builtin_tools=[ImageGenerationTool()])
3391+
agent = Agent(m, output_type=str | BinaryImage, builtin_tools=[ImageGenerationTool(size='1K')])
33923392

33933393
result = await agent.run('Tell me a two-sentence story about an axolotl, no image please.')
33943394
assert result.output == snapshot(
@@ -3653,6 +3653,44 @@ async def test_google_image_generation_tool_aspect_ratio(google_provider: Google
36533653
assert image_config == {'aspect_ratio': '16:9'}
36543654

36553655

3656+
async def test_google_image_generation_resolution(google_provider: GoogleProvider) -> None:
3657+
"""Test that resolution parameter from ImageGenerationTool is added to image_config."""
3658+
model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider)
3659+
params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='2K')])
3660+
3661+
tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage]
3662+
assert tools is None
3663+
assert image_config == {'image_size': '2K'}
3664+
3665+
3666+
async def test_google_image_generation_resolution_with_aspect_ratio(google_provider: GoogleProvider) -> None:
3667+
"""Test that resolution and aspect_ratio from ImageGenerationTool work together."""
3668+
model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider)
3669+
params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9', size='4K')])
3670+
3671+
tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage]
3672+
assert tools is None
3673+
assert image_config == {'aspect_ratio': '16:9', 'image_size': '4K'}
3674+
3675+
3676+
async def test_google_image_generation_unsupported_size_raises_error(google_provider: GoogleProvider) -> None:
3677+
"""Test that unsupported size values raise an error."""
3678+
model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider)
3679+
params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='1024x1024')])
3680+
3681+
with pytest.raises(UserError, match='Google image generation only supports `size` values'):
3682+
model._get_tools(params) # pyright: ignore[reportPrivateUsage]
3683+
3684+
3685+
async def test_google_image_generation_auto_size_raises_error(google_provider: GoogleProvider) -> None:
3686+
"""Test that 'auto' size raises an error for Google since it doesn't support intelligent size selection."""
3687+
model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider)
3688+
params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='auto')])
3689+
3690+
with pytest.raises(UserError, match='Google image generation only supports `size` values'):
3691+
model._get_tools(params) # pyright: ignore[reportPrivateUsage]
3692+
3693+
36563694
async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider):
36573695
model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider)
36583696

tests/models/test_openai.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
UserPromptPart,
3838
)
3939
from pydantic_ai._json_schema import InlineDefsJsonSchemaTransformer
40-
from pydantic_ai.builtin_tools import WebSearchTool
40+
from pydantic_ai.builtin_tools import ImageGenerationTool, WebSearchTool
4141
from pydantic_ai.models import ModelRequestParameters
4242
from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput
4343
from pydantic_ai.profiles.openai import OpenAIModelProfile, openai_model_profile
@@ -79,6 +79,7 @@
7979
OpenAIResponsesModel,
8080
OpenAIResponsesModelSettings,
8181
OpenAISystemPromptRole,
82+
_resolve_openai_image_generation_size, # pyright: ignore[reportPrivateUsage]
8283
)
8384
from pydantic_ai.profiles.openai import OpenAIJsonSchemaTransformer
8485
from pydantic_ai.providers.cerebras import CerebrasProvider
@@ -103,6 +104,43 @@ def test_init():
103104
assert m.model_name == 'gpt-4o'
104105

105106

107+
@pytest.mark.parametrize(
108+
'aspect_ratio,size,expected',
109+
[
110+
# aspect_ratio is None, various sizes
111+
(None, None, 'auto'),
112+
(None, 'auto', 'auto'),
113+
(None, '1024x1024', '1024x1024'),
114+
(None, '1024x1536', '1024x1536'),
115+
(None, '1536x1024', '1536x1024'),
116+
# Valid aspect_ratios with no size
117+
('1:1', None, '1024x1024'),
118+
('2:3', None, '1024x1536'),
119+
('3:2', None, '1536x1024'),
120+
# Valid aspect_ratios with compatible sizes
121+
('1:1', 'auto', '1024x1024'),
122+
('1:1', '1024x1024', '1024x1024'),
123+
('2:3', '1024x1536', '1024x1536'),
124+
('3:2', '1536x1024', '1536x1024'),
125+
],
126+
)
127+
def test_openai_image_generation_size_valid_combinations(
128+
aspect_ratio: Literal['1:1', '2:3', '3:2'] | None,
129+
size: Literal['auto', '1024x1024', '1024x1536', '1536x1024'] | None,
130+
expected: Literal['auto', '1024x1024', '1024x1536', '1536x1024'],
131+
) -> None:
132+
"""Test valid combinations of aspect_ratio and size for OpenAI image generation."""
133+
tool = ImageGenerationTool(aspect_ratio=aspect_ratio, size=size)
134+
assert _resolve_openai_image_generation_size(tool) == expected
135+
136+
137+
def test_openai_image_generation_tool_aspect_ratio_invalid() -> None:
138+
"""Test that invalid aspect_ratio raises UserError."""
139+
tool = ImageGenerationTool(aspect_ratio='16:9')
140+
with pytest.raises(UserError, match='OpenAI image generation only supports `aspect_ratio` values'):
141+
_resolve_openai_image_generation_size(tool)
142+
143+
106144
async def test_request_simple_success(allow_model_requests: None):
107145
c = completion_message(
108146
ChatCompletionMessage(content='world', role='assistant'),

tests/models/test_openai_responses.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,12 @@ def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size
163163
_resolve_openai_image_generation_size(tool)
164164

165165

166+
def test_openai_responses_image_generation_tool_unsupported_size_raises_error() -> None:
167+
tool = ImageGenerationTool(size='2K')
168+
with pytest.raises(UserError, match='OpenAI image generation only supports `size` values'):
169+
_resolve_openai_image_generation_size(tool)
170+
171+
166172
async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str):
167173
model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
168174

tests/test_examples.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,12 @@ async def model_logic( # noqa: C901
698698
FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')),
699699
]
700700
)
701+
elif m.content == 'Generate a high-resolution wide landscape illustration of an axolotl.':
702+
return ModelResponse(
703+
parts=[
704+
FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='high-res-axolotl')),
705+
]
706+
)
701707
elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.':
702708
return ModelResponse(
703709
parts=[

0 commit comments

Comments
 (0)