Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from sys import getsizeof
from typing import TYPE_CHECKING

from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE

if TYPE_CHECKING:
from typing import Any, Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -141,6 +143,57 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
return 0


def redact_blob_message_parts(
messages: "List[Dict[str, Any]]",
) -> "List[Dict[str, Any]]":
"""
Redact blob message parts from the messages, by removing the "content" key.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "[Filtered]"
}
]
}
"""

for message in messages:
if not isinstance(message, dict):
continue

content = message.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "blob":
item["content"] = SENSITIVE_DATA_SUBSTITUTE
return messages


def truncate_messages_by_size(
messages: "List[Dict[str, Any]]",
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
Expand Down Expand Up @@ -186,6 +239,8 @@ def truncate_and_annotate_messages(
if not messages:
return None

messages = redact_blob_message_parts(messages)

truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
if removed_count > 0:
scope._gen_ai_original_message_count[span.span_id] = len(messages)
Expand Down
137 changes: 126 additions & 11 deletions sentry_sdk/integrations/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,104 @@ def _collect_ai_data(
return model, input_tokens, output_tokens, content_blocks


def _transform_content_block(content_block: "dict[str, Any]") -> "dict[str, Any]":
"""
Transform an Anthropic content block to a Sentry-compatible format.

Handles binary data (images, documents) by converting them to the standardized format:
- base64 encoded data -> type: "blob"
- URL references -> type: "uri"
- file_id references -> type: "file"
"""
block_type = content_block.get("type")

# Handle image blocks
if block_type == "image":
source = content_block.get("source", {})
source_type = source.get("type")
media_type = source.get("media_type", "")

if source_type == "base64":
return {
"type": "blob",
"modality": "image",
"mime_type": media_type,
"content": source.get("data", ""),
}
elif source_type == "url":
return {
"type": "uri",
"modality": "image",
"mime_type": media_type,
"uri": source.get("url", ""),
}
elif source_type == "file":
return {
"type": "file",
"modality": "image",
"mime_type": media_type,
"file_id": source.get("file_id", ""),
}

# Handle document blocks (PDFs, etc.)
elif block_type == "document":
source = content_block.get("source", {})
source_type = source.get("type")
media_type = source.get("media_type", "")

if source_type == "base64":
return {
"type": "blob",
"modality": "document",
"mime_type": media_type,
"content": source.get("data", ""),
}
elif source_type == "url":
return {
"type": "uri",
"modality": "document",
"mime_type": media_type,
"uri": source.get("url", ""),
}
elif source_type == "file":
return {
"type": "file",
"modality": "document",
"mime_type": media_type,
"file_id": source.get("file_id", ""),
}
elif source_type == "text":
# Plain text documents - keep as is but mark the type
return {
"type": "text",
"text": source.get("data", ""),
}

# For text blocks and other types, return as-is
return content_block


def _transform_message_content(
content: "Any",
) -> "Any":
"""
Transform message content, handling both string content and list of content blocks.
"""
if isinstance(content, str):
return content

if isinstance(content, (list, tuple)):
transformed = []
for block in content:
if isinstance(block, dict):
transformed.append(_transform_content_block(block))
else:
transformed.append(block)
return transformed

return content


def _set_input_data(
span: "Span", kwargs: "dict[str, Any]", integration: "AnthropicIntegration"
) -> None:
Expand Down Expand Up @@ -164,19 +262,36 @@ def _set_input_data(
and "content" in message
and isinstance(message["content"], (list, tuple))
):
transformed_content = []
for item in message["content"]:
if item.get("type") == "tool_result":
normalized_messages.append(
{
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL,
"content": { # type: ignore[dict-item]
"tool_use_id": item.get("tool_use_id"),
"output": item.get("content"),
},
}
)
# Skip tool_result items - they can contain images/documents
# with nested structures that are difficult to redact properly
if isinstance(item, dict) and item.get("type") == "tool_result":
continue

# Transform content blocks (images, documents, etc.)
transformed_content.append(
_transform_content_block(item)
if isinstance(item, dict)
else item
)

# If there are non-tool-result items, add them as a message
if transformed_content:
normalized_messages.append(
{
"role": message.get("role"),
"content": transformed_content,
}
)
else:
normalized_messages.append(message)
# Transform content for non-list messages or assistant messages
transformed_message = message.copy()
if "content" in transformed_message:
transformed_message["content"] = _transform_message_content(
transformed_message["content"]
)
normalized_messages.append(transformed_message)

role_normalized_messages = normalize_message_roles(normalized_messages)
scope = sentry_sdk.get_current_scope()
Expand Down
Loading
Loading