Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
1f32952
fix(ai): redact message parts content of type blob
constantinius Dec 17, 2025
795bcea
fix(ai): skip non dict messages
constantinius Dec 17, 2025
a623e13
fix(ai): typing
constantinius Dec 17, 2025
3d3ce5b
fix(ai): content items may not be dicts
constantinius Dec 17, 2025
36fcaf9
fix(litellm): fix `gen_ai.request.messages` to be as expected
constantinius Dec 17, 2025
d9d1264
tests: add tests for litellm message conversion
constantinius Dec 17, 2025
4a17806
fix(integrations): ensure _convert_message_parts does not mutate orig…
constantinius Jan 8, 2026
db071c2
Merge branch 'master' into constantinius/fix/integrations/litellm-rep…
constantinius Jan 13, 2026
280202f
fix: addressing review comments and fix test
constantinius Jan 13, 2026
97cc614
Merge branch 'master' into constantinius/fix/integrations/litellm-rep…
constantinius Jan 14, 2026
8cde746
fix(integrations): using common function to parse data URIs
constantinius Jan 14, 2026
bbab566
fix: litellm test errors
constantinius Jan 15, 2026
869cb42
fix: docstring
constantinius Jan 15, 2026
bd78165
feat(ai): Add shared content transformation functions for multimodal …
constantinius Jan 15, 2026
c2aac53
Merge shared content transformation functions
constantinius Jan 15, 2026
026992f
refactor(litellm): Use shared transform_message_content from ai/utils
constantinius Jan 15, 2026
412b93e
refactor(ai): split transform_content_part into SDK-specific functions
constantinius Jan 15, 2026
15c63ff
Merge SDK-specific transform functions
constantinius Jan 15, 2026
88c04f9
refactor(litellm): use transform_openai_content_part directly
constantinius Jan 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from sys import getsizeof
from typing import TYPE_CHECKING

from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE

if TYPE_CHECKING:
from typing import Any, Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -141,6 +143,57 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
return 0


def redact_blob_message_parts(
messages: "List[Dict[str, Any]]",
) -> "List[Dict[str, Any]]":
"""
Redact blob message parts from the messages, by removing the "content" key.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "[Filtered]"
}
]
}
"""

for message in messages:
if not isinstance(message, dict):
continue

content = message.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "blob":
item["content"] = SENSITIVE_DATA_SUBSTITUTE
return messages


def truncate_messages_by_size(
messages: "List[Dict[str, Any]]",
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
Expand Down Expand Up @@ -186,6 +239,8 @@ def truncate_and_annotate_messages(
if not messages:
return None

messages = redact_blob_message_parts(messages)

truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
if removed_count > 0:
scope._gen_ai_original_message_count[span.span_id] = len(messages)
Expand Down
65 changes: 64 additions & 1 deletion sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from sentry_sdk.utils import event_from_exception

if TYPE_CHECKING:
from typing import Any, Dict
from typing import Any, Dict, List
from datetime import datetime

try:
Expand All @@ -35,6 +35,68 @@ def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
return metadata


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
"""
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,...",
"detail": "high"
}
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
"""

def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
if item.get("type") == "image_url":
image_url = item.get("image_url") or {}
if image_url.get("url", "").startswith("data:"):
return {
"type": "blob",
"modality": "image",
"mime_type": item["image_url"]["url"].split(";base64,")[0],
"content": item["image_url"]["url"].split(";base64,")[1],
}
else:
return {
"type": "uri",
"uri": item["image_url"]["url"],
}
return item

for message in messages:
content = message.get("content")
if isinstance(content, list):
message["content"] = [_map_item(item) for item in content]
return messages


def _input_callback(kwargs: "Dict[str, Any]") -> None:
"""Handle the start of a request."""
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
Expand Down Expand Up @@ -101,6 +163,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
messages = kwargs.get("messages", [])
if messages:
scope = sentry_sdk.get_current_scope()
messages = _convert_message_parts(messages)
messages_data = truncate_and_annotate_messages(messages, span, scope)
if messages_data is not None:
set_data_normalized(
Expand Down
161 changes: 161 additions & 0 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import json
import pytest
import time
Expand All @@ -23,6 +24,7 @@ async def __call__(self, *args, **kwargs):
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations.litellm import (
LiteLLMIntegration,
_convert_message_parts,
_input_callback,
_success_callback,
_failure_callback,
Expand Down Expand Up @@ -753,3 +755,162 @@ def test_litellm_message_truncation(sentry_init, capture_events):
assert "small message 4" in str(parsed_messages[0])
assert "small message 5" in str(parsed_messages[1])
assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5


IMAGE_DATA = b"fake_image_data_12345"
IMAGE_B64 = base64.b64encode(IMAGE_DATA).decode("utf-8")
IMAGE_DATA_URI = f"data:image/png;base64,{IMAGE_B64}"


def test_binary_content_encoding_image_url(sentry_init, capture_events):
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Look at this image:"},
{
"type": "image_url",
"image_url": {"url": IMAGE_DATA_URI, "detail": "high"},
},
],
}
]
mock_response = MockCompletionResponse()

with start_transaction(name="litellm test"):
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
_input_callback(kwargs)
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())

(event,) = events
(span,) = event["spans"]
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

blob_item = next(
(
item
for msg in messages_data
if "content" in msg
for item in msg["content"]
if item.get("type") == "blob"
),
None,
)
assert blob_item is not None
assert blob_item["modality"] == "image"
assert blob_item["mime_type"] == "data:image/png"
assert IMAGE_B64 in blob_item["content"] or "[Filtered]" in str(
blob_item["content"]
)


def test_binary_content_encoding_mixed_content(sentry_init, capture_events):
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Here is an image:"},
{
"type": "image_url",
"image_url": {"url": IMAGE_DATA_URI},
},
{"type": "text", "text": "What do you see?"},
],
}
]
mock_response = MockCompletionResponse()

with start_transaction(name="litellm test"):
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
_input_callback(kwargs)
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())

(event,) = events
(span,) = event["spans"]
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

content_items = [
item for msg in messages_data if "content" in msg for item in msg["content"]
]
assert any(item.get("type") == "text" for item in content_items)
assert any(item.get("type") == "blob" for item in content_items)


def test_binary_content_encoding_uri_type(sentry_init, capture_events):
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": "https://example.com/image.jpg"},
}
],
}
]
mock_response = MockCompletionResponse()

with start_transaction(name="litellm test"):
kwargs = {"model": "gpt-4-vision-preview", "messages": messages}
_input_callback(kwargs)
_success_callback(kwargs, mock_response, datetime.now(), datetime.now())

(event,) = events
(span,) = event["spans"]
messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])

uri_item = next(
(
item
for msg in messages_data
if "content" in msg
for item in msg["content"]
if item.get("type") == "uri"
),
None,
)
assert uri_item is not None
assert uri_item["uri"] == "https://example.com/image.jpg"


def test_convert_message_parts_direct():
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Hello"},
{
"type": "image_url",
"image_url": {"url": IMAGE_DATA_URI},
},
],
}
]
converted = _convert_message_parts(messages)
blob_item = next(
item for item in converted[0]["content"] if item.get("type") == "blob"
)
assert blob_item["modality"] == "image"
assert blob_item["mime_type"] == "data:image/png"
assert IMAGE_B64 in blob_item["content"]
Loading
Loading