Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion dspy/clients/lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,9 @@ def _convert_chat_request_to_responses_request(request: dict[str, Any]):
if isinstance(c, str):
content_blocks.append({"type": "input_text", "text": c})
elif isinstance(c, list):
content_blocks.extend(c)
# Convert each content item from Chat API format to Responses API format
for item in c:
content_blocks.append(_convert_content_item_to_responses_format(item))
request["input"] = [{"role": msg.get("role", "user"), "content": content_blocks}]

# Convert `response_format` to `text.format` for Responses API
Expand All @@ -480,6 +482,38 @@ def _convert_chat_request_to_responses_request(request: dict[str, Any]):
return request


def _convert_content_item_to_responses_format(item: dict[str, Any]) -> dict[str, Any]:
"""
Convert a content item from Chat API format to Responses API format.

For images, converts from:
{"type": "image_url", "image_url": {"url": "..."}}
To:
{"type": "input_image", "image_url": "..."}

For text, converts from:
{"type": "text", "text": "..."}
To:
{"type": "input_text", "text": "..."}

For other types, passes through as-is.
"""
if item.get("type") == "image_url":
image_url = item.get("image_url", {}).get("url", "")
return {
"type": "input_image",
"image_url": image_url,
}
elif item.get("type") == "text":
return {
"type": "input_text",
"text": item.get("text", ""),
}

# For other items, return as-is
return item


def _get_headers(headers: dict[str, Any] | None = None):
headers = headers or {}
return {
Expand Down
128 changes: 126 additions & 2 deletions tests/clients/test_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,8 @@ def test_reasoning_model_requirements(model_name):
lm = dspy.LM(
model=model_name,
)
assert lm.kwargs["temperature"] == None
assert lm.kwargs["max_completion_tokens"] == None
assert lm.kwargs["temperature"] is None
assert lm.kwargs["max_completion_tokens"] is None


def test_dump_state():
Expand Down Expand Up @@ -633,3 +633,127 @@ def test_api_key_not_saved_in_json():
assert saved_state["lm"]["model"] == "openai/gpt-4o-mini"
assert saved_state["lm"]["temperature"] == 1.0
assert saved_state["lm"]["max_tokens"] == 100


def test_responses_api_converts_images_correctly():
from dspy.clients.lm import _convert_chat_request_to_responses_request

# Test with base64 image
request_with_base64_image = {
"model": "openai/gpt-5-mini",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": {
"url": ""
}
}
]
}
]
}

result = _convert_chat_request_to_responses_request(request_with_base64_image)

assert "input" in result
assert len(result["input"]) == 1
assert result["input"][0]["role"] == "user"

content = result["input"][0]["content"]
assert len(content) == 2

# First item should be text converted to input_text format
assert content[0]["type"] == "input_text"
assert content[0]["text"] == "What's in this image?"

# Second item should be converted to input_image format
assert content[1]["type"] == "input_image"
assert content[1]["image_url"] == ""

# Test with URL image
request_with_url_image = {
"model": "openai/gpt-5-mini",
"messages": [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://example.com/image.jpg"
}
}
]
}
]
}

result = _convert_chat_request_to_responses_request(request_with_url_image)

content = result["input"][0]["content"]
assert len(content) == 1
assert content[0]["type"] == "input_image"
assert content[0]["image_url"] == "https://example.com/image.jpg"


def test_responses_api_with_image_input():
api_response = make_response(
output_blocks=[
ResponseOutputMessage(
**{
"id": "msg_1",
"type": "message",
"role": "assistant",
"status": "completed",
"content": [
{"type": "output_text", "text": "This is a test answer with image input.", "annotations": []}
],
},
),
]
)

with mock.patch("litellm.responses", autospec=True, return_value=api_response) as dspy_responses:
lm = dspy.LM(
model="openai/gpt-5-mini",
model_type="responses",
cache=False,
temperature=1.0,
max_tokens=16000,
)

# Test with messages containing an image
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image"},
{
"type": "image_url",
"image_url": {
"url": ""
}
}
]
}
]

lm_result = lm(messages=messages)

assert lm_result == [{"text": "This is a test answer with image input."}]

dspy_responses.assert_called_once()
call_args = dspy_responses.call_args.kwargs

# Verify the request was converted correctly
assert "input" in call_args
content = call_args["input"][0]["content"]

# Check that image was converted to input_image format
image_content = [c for c in content if c.get("type") == "input_image"]
assert len(image_content) == 1
assert image_content[0]["image_url"] == ""