Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
1f32952
fix(ai): redact message parts content of type blob
constantinius Dec 17, 2025
795bcea
fix(ai): skip non dict messages
constantinius Dec 17, 2025
a623e13
fix(ai): typing
constantinius Dec 17, 2025
3d3ce5b
fix(ai): content items may not be dicts
constantinius Dec 17, 2025
7fa0b37
fix(integrations): pydantic-ai: properly format binary input message …
constantinius Dec 17, 2025
3367599
fix: remove manual breakpoint()
constantinius Dec 17, 2025
704414c
tests: add tests for message formatting
constantinius Dec 17, 2025
961947d
Merge branch 'master' into constantinius/fix/integrations/pydantic-ai…
constantinius Jan 13, 2026
a488747
test: fix testcase
constantinius Jan 13, 2026
20e46fc
Merge branch 'master' into constantinius/fix/integrations/pydantic-ai…
constantinius Jan 14, 2026
c971ed6
fix(integrations): pydantic-ai Skip base64 encoding for blob content …
constantinius Jan 14, 2026
bd78165
feat(ai): Add shared content transformation functions for multimodal …
constantinius Jan 15, 2026
1546a07
Merge shared content transformation functions
constantinius Jan 15, 2026
e7128e3
refactor(pydantic-ai): Use shared get_modality_from_mime_type from ai…
constantinius Jan 15, 2026
2fe4933
fix: missing binary content in invoke agent spans
constantinius Jan 15, 2026
412b93e
refactor(ai): split transform_content_part into SDK-specific functions
constantinius Jan 15, 2026
8b977da
Merge SDK-specific transform functions
constantinius Jan 15, 2026
401a018
Merge branch 'master' into constantinius/fix/integrations/pydantic-ai…
constantinius Jan 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from sys import getsizeof
from typing import TYPE_CHECKING

from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE

if TYPE_CHECKING:
from typing import Any, Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -141,6 +143,57 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
return 0


def redact_blob_message_parts(
messages: "List[Dict[str, Any]]",
) -> "List[Dict[str, Any]]":
"""
Redact blob message parts from the messages, by removing the "content" key.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "[Filtered]"
}
]
}
"""

for message in messages:
if not isinstance(message, dict):
continue

content = message.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "blob":
item["content"] = SENSITIVE_DATA_SUBSTITUTE
return messages


def truncate_messages_by_size(
messages: "List[Dict[str, Any]]",
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
Expand Down Expand Up @@ -186,6 +239,8 @@ def truncate_and_annotate_messages(
if not messages:
return None

messages = redact_blob_message_parts(messages)

truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
if removed_count > 0:
scope._gen_ai_original_message_count[span.span_id] = len(messages)
Expand Down
26 changes: 24 additions & 2 deletions sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import base64
import sentry_sdk
from sentry_sdk.ai.utils import set_data_normalized
from sentry_sdk.ai.utils import (
normalize_message_roles,
set_data_normalized,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.utils import safe_serialize

Expand Down Expand Up @@ -29,6 +34,7 @@
UserPromptPart,
TextPart,
ThinkingPart,
BinaryContent,
)
except ImportError:
# Fallback if these classes are not available
Expand All @@ -38,6 +44,7 @@
UserPromptPart = None
TextPart = None
ThinkingPart = None
BinaryContent = None


def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> None:
Expand Down Expand Up @@ -107,6 +114,16 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non
for item in part.content:
if isinstance(item, str):
content.append({"type": "text", "text": item})
elif BinaryContent and isinstance(item, BinaryContent):
breakpoint()
content.append(
{
"type": "blob",
"modality": item.media_type.split("/")[0],
"mime_type": item.media_type,
"content": f"data:{item.media_type};base64,{base64.b64encode(item.data).decode('utf-8')}",
}
)
else:
content.append(safe_serialize(item))
else:
Expand All @@ -124,8 +141,13 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non
formatted_messages.append(message)

if formatted_messages:
normalized_messages = normalize_message_roles(formatted_messages)
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(
normalized_messages, span, scope
)
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, formatted_messages, unpack=False
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
)
except Exception:
# If we fail to format messages, just skip it
Expand Down
14 changes: 12 additions & 2 deletions sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import sentry_sdk
from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized
from sentry_sdk.ai.utils import (
get_start_span_function,
normalize_message_roles,
set_data_normalized,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import OP, SPANDATA

from ..consts import SPAN_ORIGIN
Expand Down Expand Up @@ -102,8 +107,13 @@ def invoke_agent_span(
)

if messages:
normalized_messages = normalize_message_roles(messages)
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(
normalized_messages, span, scope
)
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
)

return span
Expand Down
2 changes: 1 addition & 1 deletion sentry_sdk/integrations/pydantic_ai/spans/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Union
from typing import Union, Dict, Any, List
from pydantic_ai.usage import RequestUsage, RunUsage # type: ignore


Expand Down
106 changes: 105 additions & 1 deletion tests/test_ai_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

import sentry_sdk
from sentry_sdk._types import AnnotatedValue
from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE
from sentry_sdk.ai.monitoring import ai_track
from sentry_sdk.ai.utils import (
MAX_GEN_AI_MESSAGE_BYTES,
Expand All @@ -13,6 +13,7 @@
truncate_and_annotate_messages,
truncate_messages_by_size,
_find_truncation_index,
redact_blob_message_parts,
)
from sentry_sdk.serializer import serialize
from sentry_sdk.utils import safe_serialize
Expand Down Expand Up @@ -542,3 +543,106 @@ def __init__(self):
assert isinstance(messages_value, AnnotatedValue)
assert messages_value.metadata["len"] == stored_original_length
assert len(messages_value.value) == len(truncated_messages)


class TestRedactBlobMessageParts:
def test_redacts_single_blob_content(self):
"""Test that blob content is redacted in a message with single blob part"""
messages = [
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text",
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
},
],
}
]

result = redact_blob_message_parts(messages)

assert result == messages # Returns the same list
assert (
messages[0]["content"][0]["text"]
== "How many ponies do you see in the image?"
)
assert messages[0]["content"][0]["type"] == "text"
assert messages[0]["content"][1]["type"] == "blob"
assert messages[0]["content"][1]["modality"] == "image"
assert messages[0]["content"][1]["mime_type"] == "image/jpeg"
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE

def test_redacts_multiple_blob_parts(self):
"""Test that multiple blob parts in a single message are all redacted"""
messages = [
{
"role": "user",
"content": [
{"text": "Compare these images", "type": "text"},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,first_image",
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/png",
"content": "data:image/png;base64,second_image",
},
],
}
]

result = redact_blob_message_parts(messages)

assert result == messages
assert messages[0]["content"][0]["text"] == "Compare these images"
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE

def test_redacts_blobs_in_multiple_messages(self):
"""Test that blob parts are redacted across multiple messages"""
messages = [
{
"role": "user",
"content": [
{"text": "First message", "type": "text"},
{
"type": "blob",
"modality": "image",
"content": "data:image/jpeg;base64,first",
},
],
},
{
"role": "assistant",
"content": "I see the image.",
},
{
"role": "user",
"content": [
{"text": "Second message", "type": "text"},
{
"type": "blob",
"modality": "image",
"content": "data:image/jpeg;base64,second",
},
],
},
]

result = redact_blob_message_parts(messages)

assert result == messages
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
assert messages[1]["content"] == "I see the image." # Unchanged
assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
Loading