Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
1f32952
fix(ai): redact message parts content of type blob
constantinius Dec 17, 2025
795bcea
fix(ai): skip non dict messages
constantinius Dec 17, 2025
a623e13
fix(ai): typing
constantinius Dec 17, 2025
3d3ce5b
fix(ai): content items may not be dicts
constantinius Dec 17, 2025
36fcaf9
fix(litellm): fix `gen_ai.request.messages` to be as expected
constantinius Dec 17, 2025
d9d1264
tests: add tests for litellm message conversion
constantinius Dec 17, 2025
4a17806
fix(integrations): ensure _convert_message_parts does not mutate orig…
constantinius Jan 8, 2026
db071c2
Merge branch 'master' into constantinius/fix/integrations/litellm-rep…
constantinius Jan 13, 2026
280202f
fix: addressing review comments and fix test
constantinius Jan 13, 2026
97cc614
Merge branch 'master' into constantinius/fix/integrations/litellm-rep…
constantinius Jan 14, 2026
8cde746
fix(integrations): using common function to parse data URIs
constantinius Jan 14, 2026
bbab566
fix: litellm test errors
constantinius Jan 15, 2026
869cb42
fix: docstring
constantinius Jan 15, 2026
bd78165
feat(ai): Add shared content transformation functions for multimodal …
constantinius Jan 15, 2026
c2aac53
Merge shared content transformation functions
constantinius Jan 15, 2026
026992f
refactor(litellm): Use shared transform_message_content from ai/utils
constantinius Jan 15, 2026
412b93e
refactor(ai): split transform_content_part into SDK-specific functions
constantinius Jan 15, 2026
15c63ff
Merge SDK-specific transform functions
constantinius Jan 15, 2026
88c04f9
refactor(litellm): use transform_openai_content_part directly
constantinius Jan 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from sys import getsizeof
from typing import TYPE_CHECKING

from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE

if TYPE_CHECKING:
from typing import Any, Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -141,6 +143,57 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
return 0


def redact_blob_message_parts(
messages: "List[Dict[str, Any]]",
) -> "List[Dict[str, Any]]":
"""
Redact blob message parts from the messages, by removing the "content" key.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "[Filtered]"
}
]
}
"""

for message in messages:
if not isinstance(message, dict):
continue

content = message.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "blob":
item["content"] = SENSITIVE_DATA_SUBSTITUTE
return messages


def truncate_messages_by_size(
messages: "List[Dict[str, Any]]",
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
Expand Down Expand Up @@ -186,6 +239,8 @@ def truncate_and_annotate_messages(
if not messages:
return None

messages = redact_blob_message_parts(messages)

truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
if removed_count > 0:
scope._gen_ai_original_message_count[span.span_id] = len(messages)
Expand Down
65 changes: 64 additions & 1 deletion sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from sentry_sdk.utils import event_from_exception

if TYPE_CHECKING:
from typing import Any, Dict
from typing import Any, Dict, List
from datetime import datetime

try:
Expand All @@ -35,6 +35,68 @@ def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
return metadata


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
"""
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,...",
"detail": "high"
}
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
"""

def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
if item.get("type") == "image_url":
image_url = item.get("image_url") or {}
if image_url.get("url", "").startswith("data:"):
return {
"type": "blob",
"modality": "image",
"mime_type": item["image_url"]["url"].split(";base64,")[0],
"content": item["image_url"]["url"].split(";base64,")[1],
}
else:
return {
"type": "uri",
"uri": item["image_url"]["url"],
}
return item

for message in messages:
content = message.get("content")
if isinstance(content, list):
message["content"] = [_map_item(item) for item in content]
return messages


def _input_callback(kwargs: "Dict[str, Any]") -> None:
"""Handle the start of a request."""
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
Expand Down Expand Up @@ -101,6 +163,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
messages = kwargs.get("messages", [])
if messages:
scope = sentry_sdk.get_current_scope()
messages = _convert_message_parts(messages)
messages_data = truncate_and_annotate_messages(messages, span, scope)
if messages_data is not None:
set_data_normalized(
Expand Down
106 changes: 105 additions & 1 deletion tests/test_ai_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

import sentry_sdk
from sentry_sdk._types import AnnotatedValue
from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE
from sentry_sdk.ai.monitoring import ai_track
from sentry_sdk.ai.utils import (
MAX_GEN_AI_MESSAGE_BYTES,
Expand All @@ -13,6 +13,7 @@
truncate_and_annotate_messages,
truncate_messages_by_size,
_find_truncation_index,
redact_blob_message_parts,
)
from sentry_sdk.serializer import serialize
from sentry_sdk.utils import safe_serialize
Expand Down Expand Up @@ -542,3 +543,106 @@ def __init__(self):
assert isinstance(messages_value, AnnotatedValue)
assert messages_value.metadata["len"] == stored_original_length
assert len(messages_value.value) == len(truncated_messages)


class TestRedactBlobMessageParts:
def test_redacts_single_blob_content(self):
"""Test that blob content is redacted in a message with single blob part"""
messages = [
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text",
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
},
],
}
]

result = redact_blob_message_parts(messages)

assert result == messages # Returns the same list
assert (
messages[0]["content"][0]["text"]
== "How many ponies do you see in the image?"
)
assert messages[0]["content"][0]["type"] == "text"
assert messages[0]["content"][1]["type"] == "blob"
assert messages[0]["content"][1]["modality"] == "image"
assert messages[0]["content"][1]["mime_type"] == "image/jpeg"
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE

def test_redacts_multiple_blob_parts(self):
"""Test that multiple blob parts in a single message are all redacted"""
messages = [
{
"role": "user",
"content": [
{"text": "Compare these images", "type": "text"},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,first_image",
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/png",
"content": "data:image/png;base64,second_image",
},
],
}
]

result = redact_blob_message_parts(messages)

assert result == messages
assert messages[0]["content"][0]["text"] == "Compare these images"
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE

def test_redacts_blobs_in_multiple_messages(self):
"""Test that blob parts are redacted across multiple messages"""
messages = [
{
"role": "user",
"content": [
{"text": "First message", "type": "text"},
{
"type": "blob",
"modality": "image",
"content": "data:image/jpeg;base64,first",
},
],
},
{
"role": "assistant",
"content": "I see the image.",
},
{
"role": "user",
"content": [
{"text": "Second message", "type": "text"},
{
"type": "blob",
"modality": "image",
"content": "data:image/jpeg;base64,second",
},
],
},
]

result = redact_blob_message_parts(messages)

assert result == messages
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
assert messages[1]["content"] == "I see the image." # Unchanged
assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
Loading