From 1f32952d0066a9dc1ff1482cef48c3cbe0acb663 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 10:45:45 +0100 Subject: [PATCH 1/9] fix(ai): redact message parts content of type blob --- sentry_sdk/ai/utils.py | 51 +++++++++++++++++ tests/test_ai_monitoring.py | 106 +++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 1d2b4483c9..73155b0305 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -5,6 +5,8 @@ from sys import getsizeof from typing import TYPE_CHECKING +from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE + if TYPE_CHECKING: from typing import Any, Callable, Dict, List, Optional, Tuple @@ -141,6 +143,53 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> return 0 +def redact_blob_message_parts(messages): + # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int] + """ + Redact blob message parts from the messages, by removing the "content" key. + e.g: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,..." + } + ] + } + becomes: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "[Filtered]" + } + ] + } + """ + + for message in messages: + content = message.get("content") + if isinstance(content, list): + for item in content: + if item.get("type") == "blob": + item["content"] = SENSITIVE_DATA_SUBSTITUTE + return messages + + def truncate_messages_by_size( messages: "List[Dict[str, Any]]", max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, @@ -186,6 +235,8 @@ def truncate_and_annotate_messages( if not messages: return None + messages = redact_blob_message_parts(messages) + truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes) if removed_count > 0: scope._gen_ai_original_message_count[span.span_id] = len(messages) diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index 8d3d4ba204..e9f3712cd3 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -4,7 +4,7 @@ import pytest import sentry_sdk -from sentry_sdk._types import AnnotatedValue +from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE from sentry_sdk.ai.monitoring import ai_track from sentry_sdk.ai.utils import ( MAX_GEN_AI_MESSAGE_BYTES, @@ -13,6 +13,7 @@ truncate_and_annotate_messages, truncate_messages_by_size, _find_truncation_index, + redact_blob_message_parts, ) from sentry_sdk.serializer import serialize from sentry_sdk.utils import safe_serialize @@ -542,3 +543,106 @@ def __init__(self): assert isinstance(messages_value, AnnotatedValue) assert messages_value.metadata["len"] == stored_original_length assert len(messages_value.value) == len(truncated_messages) + + +class TestRedactBlobMessageParts: + def test_redacts_single_blob_content(self): + """Test that blob content is redacted in a message with single blob part""" + messages = [ + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text", + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "", + }, + ], + } + ] + + result = redact_blob_message_parts(messages) + + assert result == messages # Returns the same list + assert ( + messages[0]["content"][0]["text"] + == "How many ponies do you see in the image?" + ) + assert messages[0]["content"][0]["type"] == "text" + assert messages[0]["content"][1]["type"] == "blob" + assert messages[0]["content"][1]["modality"] == "image" + assert messages[0]["content"][1]["mime_type"] == "image/jpeg" + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + + def test_redacts_multiple_blob_parts(self): + """Test that multiple blob parts in a single message are all redacted""" + messages = [ + { + "role": "user", + "content": [ + {"text": "Compare these images", "type": "text"}, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "_image", + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "_image", + }, + ], + } + ] + + result = redact_blob_message_parts(messages) + + assert result == messages + assert messages[0]["content"][0]["text"] == "Compare these images" + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE + + def test_redacts_blobs_in_multiple_messages(self): + """Test that blob parts are redacted across multiple messages""" + messages = [ + { + "role": "user", + "content": [ + {"text": "First message", "type": "text"}, + { + "type": "blob", + "modality": "image", + "content": "", + }, + ], + }, + { + "role": "assistant", + "content": "I see the image.", + }, + { + "role": "user", + "content": [ + {"text": "Second message", "type": "text"}, + { + "type": "blob", + "modality": "image", + "content": "", + }, + ], + }, + ] + + result = redact_blob_message_parts(messages) + + assert result == messages + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + assert messages[1]["content"] == "I see the image." # Unchanged + assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE From 795bcea241f7777e646a4da14c870a3049bdbe90 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:05:04 +0100 Subject: [PATCH 2/9] fix(ai): skip non dict messages --- sentry_sdk/ai/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 73155b0305..ae507e898b 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -182,6 +182,9 @@ def redact_blob_message_parts(messages): """ for message in messages: + if not isinstance(message, dict): + continue + content = message.get("content") if isinstance(content, list): for item in content: From a623e137d26e982c0d85258256c0ba013f9ecb24 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:21:43 +0100 Subject: [PATCH 3/9] fix(ai): typing --- sentry_sdk/ai/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index ae507e898b..1b61c7a113 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -143,8 +143,9 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> return 0 -def redact_blob_message_parts(messages): - # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int] +def redact_blob_message_parts( + messages: "List[Dict[str, Any]]", +) -> "List[Dict[str, Any]]": """ Redact blob message parts from the messages, by removing the "content" key. e.g: From 3d3ce5bbdca43f14194edbbbee11d3b6dcd6d8a3 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:37:12 +0100 Subject: [PATCH 4/9] fix(ai): content items may not be dicts --- sentry_sdk/ai/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 1b61c7a113..78a64ab737 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -189,7 +189,7 @@ def redact_blob_message_parts( content = message.get("content") if isinstance(content, list): for item in content: - if item.get("type") == "blob": + if isinstance(item, dict) and item.get("type") == "blob": item["content"] = SENSITIVE_DATA_SUBSTITUTE return messages From 36fcaf9df158819d9589b86ef73781286badbeae Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 17:24:05 +0100 Subject: [PATCH 5/9] fix(litellm): fix `gen_ai.request.messages` to be as expected --- sentry_sdk/integrations/litellm.py | 65 +++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 08cb217962..1e3f18ddf6 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -14,7 +14,7 @@ from sentry_sdk.utils import event_from_exception if TYPE_CHECKING: - from typing import Any, Dict + from typing import Any, Dict, List from datetime import datetime try: @@ -35,6 +35,68 @@ def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]": return metadata +def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]": + """ + Convert the message parts from OpenAI format to the `gen_ai.request.messages` format. + e.g: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64,...", + "detail": "high" + } + } + ] + } + becomes: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,..." + } + ] + } + """ + + def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]": + if item.get("type") == "image_url": + image_url = item.get("image_url") or {} + if image_url.get("url", "").startswith("data:"): + return { + "type": "blob", + "modality": "image", + "mime_type": item["image_url"]["url"].split(";base64,")[0], + "content": item["image_url"]["url"].split(";base64,")[1], + } + else: + return { + "type": "uri", + "uri": item["image_url"]["url"], + } + return item + + for message in messages: + content = message.get("content") + if isinstance(content, list): + message["content"] = [_map_item(item) for item in content] + return messages + + def _input_callback(kwargs: "Dict[str, Any]") -> None: """Handle the start of a request.""" integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration) @@ -101,6 +163,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: messages = kwargs.get("messages", []) if messages: scope = sentry_sdk.get_current_scope() + messages = _convert_message_parts(messages) messages_data = truncate_and_annotate_messages(messages, span, scope) if messages_data is not None: set_data_normalized( From d9d12648f635f00abbcda2b516c1d2acda1fc625 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 17:30:17 +0100 Subject: [PATCH 6/9] tests: add tests for litellm message conversion --- tests/integrations/litellm/test_litellm.py | 161 +++++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 1b925fb61f..4f81c54b63 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -1,3 +1,4 @@ +import base64 import json import pytest import time @@ -23,6 +24,7 @@ async def __call__(self, *args, **kwargs): from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.litellm import ( LiteLLMIntegration, + _convert_message_parts, _input_callback, _success_callback, _failure_callback, @@ -753,3 +755,162 @@ def test_litellm_message_truncation(sentry_init, capture_events): assert "small message 4" in str(parsed_messages[0]) assert "small message 5" in str(parsed_messages[1]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 + + +IMAGE_DATA = b"fake_image_data_12345" +IMAGE_B64 = base64.b64encode(IMAGE_DATA).decode("utf-8") +IMAGE_DATA_URI = f"data:image/png;base64,{IMAGE_B64}" + + +def test_binary_content_encoding_image_url(sentry_init, capture_events): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Look at this image:"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI, "detail": "high"}, + }, + ], + } + ] + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = {"model": "gpt-4-vision-preview", "messages": messages} + _input_callback(kwargs) + _success_callback(kwargs, mock_response, datetime.now(), datetime.now()) + + (event,) = events + (span,) = event["spans"] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + blob_item = next( + ( + item + for msg in messages_data + if "content" in msg + for item in msg["content"] + if item.get("type") == "blob" + ), + None, + ) + assert blob_item is not None + assert blob_item["modality"] == "image" + assert blob_item["mime_type"] == "data:image/png" + assert IMAGE_B64 in blob_item["content"] or "[Filtered]" in str( + blob_item["content"] + ) + + +def test_binary_content_encoding_mixed_content(sentry_init, capture_events): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Here is an image:"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI}, + }, + {"type": "text", "text": "What do you see?"}, + ], + } + ] + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = {"model": "gpt-4-vision-preview", "messages": messages} + _input_callback(kwargs) + _success_callback(kwargs, mock_response, datetime.now(), datetime.now()) + + (event,) = events + (span,) = event["spans"] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + content_items = [ + item for msg in messages_data if "content" in msg for item in msg["content"] + ] + assert any(item.get("type") == "text" for item in content_items) + assert any(item.get("type") == "blob" for item in content_items) + + +def test_binary_content_encoding_uri_type(sentry_init, capture_events): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + ], + } + ] + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = {"model": "gpt-4-vision-preview", "messages": messages} + _input_callback(kwargs) + _success_callback(kwargs, mock_response, datetime.now(), datetime.now()) + + (event,) = events + (span,) = event["spans"] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + uri_item = next( + ( + item + for msg in messages_data + if "content" in msg + for item in msg["content"] + if item.get("type") == "uri" + ), + None, + ) + assert uri_item is not None + assert uri_item["uri"] == "https://example.com/image.jpg" + + +def test_convert_message_parts_direct(): + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Hello"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI}, + }, + ], + } + ] + converted = _convert_message_parts(messages) + blob_item = next( + item for item in converted[0]["content"] if item.get("type") == "blob" + ) + assert blob_item["modality"] == "image" + assert blob_item["mime_type"] == "data:image/png" + assert IMAGE_B64 in blob_item["content"] From 4a178068544669cbc09bf5cccd6522311604a6c3 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 8 Jan 2026 15:03:17 +0100 Subject: [PATCH 7/9] fix(integrations): ensure _convert_message_parts does not mutate original messages and handle data URLs correctly --- sentry_sdk/integrations/litellm.py | 17 +++-- tests/integrations/litellm/test_litellm.py | 80 +++++++++++++++++++++- 2 files changed, 90 insertions(+), 7 deletions(-) diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 1e3f18ddf6..51a0882ea2 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -1,3 +1,4 @@ +import copy from typing import TYPE_CHECKING import sentry_sdk @@ -72,21 +73,27 @@ def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, ] } """ + # Deep copy to avoid mutating original messages from kwargs + messages = copy.deepcopy(messages) def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]": if item.get("type") == "image_url": image_url = item.get("image_url") or {} - if image_url.get("url", "").startswith("data:"): + url = image_url.get("url", "") + if url.startswith("data:") and ";base64," in url: + parts = url.split(";base64,", 1) + # Remove "data:" prefix (5 chars) to get proper MIME type + mime_type = parts[0][5:] return { "type": "blob", "modality": "image", - "mime_type": item["image_url"]["url"].split(";base64,")[0], - "content": item["image_url"]["url"].split(";base64,")[1], + "mime_type": mime_type, + "content": parts[1], } - else: + elif url: return { "type": "uri", - "uri": item["image_url"]["url"], + "uri": url, } return item diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 4f81c54b63..bac90d02ec 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -805,7 +805,7 @@ def test_binary_content_encoding_image_url(sentry_init, capture_events): ) assert blob_item is not None assert blob_item["modality"] == "image" - assert blob_item["mime_type"] == "data:image/png" + assert blob_item["mime_type"] == "image/png" assert IMAGE_B64 in blob_item["content"] or "[Filtered]" in str( blob_item["content"] ) @@ -912,5 +912,81 @@ def test_convert_message_parts_direct(): item for item in converted[0]["content"] if item.get("type") == "blob" ) assert blob_item["modality"] == "image" - assert blob_item["mime_type"] == "data:image/png" + assert blob_item["mime_type"] == "image/png" assert IMAGE_B64 in blob_item["content"] + + +def test_convert_message_parts_does_not_mutate_original(): + """Ensure _convert_message_parts does not mutate the original messages.""" + original_url = IMAGE_DATA_URI + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": original_url}, + }, + ], + } + ] + _convert_message_parts(messages) + # Original should be unchanged + assert messages[0]["content"][0]["type"] == "image_url" + assert messages[0]["content"][0]["image_url"]["url"] == original_url + + +def test_convert_message_parts_data_url_without_base64(): + """Data URLs without ;base64, marker should be treated as regular URIs.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": "data:image/png,rawdata"}, + }, + ], + } + ] + converted = _convert_message_parts(messages) + uri_item = converted[0]["content"][0] + # Should be converted to uri type, not blob (since no base64 encoding) + assert uri_item["type"] == "uri" + assert uri_item["uri"] == "data:image/png,rawdata" + + +def test_convert_message_parts_image_url_none(): + """image_url being None should not crash.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": None, + }, + ], + } + ] + converted = _convert_message_parts(messages) + # Should return item unchanged + assert converted[0]["content"][0]["type"] == "image_url" + + +def test_convert_message_parts_image_url_missing_url(): + """image_url missing the url key should not crash.""" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"detail": "high"}, + }, + ], + } + ] + converted = _convert_message_parts(messages) + # Should return item unchanged + assert converted[0]["content"][0]["type"] == "image_url" From 280202fe9f171874c8ab0806e4188a149e861de5 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 13 Jan 2026 14:08:41 +0100 Subject: [PATCH 8/9] fix: addressing review comments and fix test --- sentry_sdk/integrations/litellm.py | 4 ++++ tests/integrations/litellm/test_litellm.py | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index d7fd4ddfd2..4261e22124 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -78,6 +78,8 @@ def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, messages = copy.deepcopy(messages) def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]": + if not isinstance(item, dict): + return item if item.get("type") == "image_url": image_url = item.get("image_url") or {} url = image_url.get("url", "") @@ -99,6 +101,8 @@ def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]": return item for message in messages: + if not isinstance(message, dict): + continue content = message.get("content") if isinstance(content, list): message["content"] = [_map_item(item) for item in content] diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index bac90d02ec..68d9898165 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -22,6 +22,7 @@ async def __call__(self, *args, **kwargs): import sentry_sdk from sentry_sdk import start_transaction from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk._types import BLOB_DATA_SUBSTITUTE from sentry_sdk.integrations.litellm import ( LiteLLMIntegration, _convert_message_parts, @@ -806,8 +807,9 @@ def test_binary_content_encoding_image_url(sentry_init, capture_events): assert blob_item is not None assert blob_item["modality"] == "image" assert blob_item["mime_type"] == "image/png" - assert IMAGE_B64 in blob_item["content"] or "[Filtered]" in str( - blob_item["content"] + assert ( + IMAGE_B64 in blob_item["content"] + or blob_item["content"] == BLOB_DATA_SUBSTITUTE ) From 8cde746f57b8076129cee0fcf499796b6ebcb8b1 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 14 Jan 2026 16:55:21 +0100 Subject: [PATCH 9/9] fix(integrations): using common function to parse data URIs --- sentry_sdk/integrations/litellm.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 4261e22124..b1bb50d326 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -6,6 +6,7 @@ from sentry_sdk.ai.monitoring import record_token_usage from sentry_sdk.ai.utils import ( get_start_span_function, + parse_data_uri, set_data_normalized, truncate_and_annotate_messages, ) @@ -83,17 +84,18 @@ def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]": if item.get("type") == "image_url": image_url = item.get("image_url") or {} url = image_url.get("url", "") - if url.startswith("data:") and ";base64," in url: - parts = url.split(";base64,", 1) - # Remove "data:" prefix (5 chars) to get proper MIME type - mime_type = parts[0][5:] - return { - "type": "blob", - "modality": "image", - "mime_type": mime_type, - "content": parts[1], - } - elif url: + if url.startswith("data:"): + try: + mime_type, content = parse_data_uri(url) + return { + "type": "blob", + "modality": "image", + "mime_type": mime_type, + "content": content, + } + except ValueError: + pass + if url: return { "type": "uri", "uri": url,