feat(bedrock): added support for tool results, definitions (#14371)

maxzhangdd · web-flow · commit d6866c457445 · 2025-08-20T20:46:02.000Z
This PR adds improved tracking of tool results and definitions to the LLMObs Bedrock integration ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
diff --git a/ddtrace/contrib/internal/botocore/services/bedrock.py b/ddtrace/contrib/internal/botocore/services/bedrock.py
@@ -183,12 +183,14 @@ def _extract_request_params_for_converse(params: Dict[str, Any]) -> Dict[str, An
     if system_content_block:
         prompt.append({"role": "system", "content": system_content_block})
     prompt += messages
+    tool_config = params.get("toolConfig", {})
     return {
         "prompt": prompt,
         "temperature": inference_config.get("temperature", ""),
         "top_p": inference_config.get("topP", ""),
         "max_tokens": inference_config.get("maxTokens", ""),
         "stop_sequences": inference_config.get("stopSequences", []),
+        "tool_config": tool_config,
     }
 
 
diff --git a/ddtrace/llmobs/_integrations/bedrock.py b/ddtrace/llmobs/_integrations/bedrock.py
@@ -23,6 +23,7 @@
 from ddtrace.llmobs._constants import PROXY_REQUEST
 from ddtrace.llmobs._constants import SPAN_KIND
 from ddtrace.llmobs._constants import TAGS
+from ddtrace.llmobs._constants import TOOL_DEFINITIONS
 from ddtrace.llmobs._integrations import BaseLLMIntegration
 from ddtrace.llmobs._integrations.bedrock_agents import _create_or_update_bedrock_trace_step_span
 from ddtrace.llmobs._integrations.bedrock_agents import _extract_trace_step_id
@@ -32,7 +33,9 @@
 from ddtrace.llmobs._integrations.utils import get_messages_from_converse_content
 from ddtrace.llmobs._integrations.utils import update_proxy_workflow_input_output_value
 from ddtrace.llmobs._telemetry import record_bedrock_agent_span_event_created
+from ddtrace.llmobs._utils import _get_attr
 from ddtrace.llmobs._writer import LLMObsSpanEvent
+from ddtrace.llmobs.utils import ToolDefinition
 from ddtrace.trace import Span
 
 
@@ -97,6 +100,10 @@ def _llmobs_set_tags(
             metadata["max_tokens"] = int(request_params.get("max_tokens") or 0)
 
         prompt = request_params.get("prompt", "")
+        tool_config = request_params.get("tool_config", {})
+        tool_definitions = self._extract_tool_definitions(tool_config)
+        if tool_definitions:
+            span._set_ctx_item(TOOL_DEFINITIONS, tool_definitions)
 
         is_converse = ctx["resource"] in ("Converse", "ConverseStream")
         input_messages = (
@@ -381,3 +388,17 @@ def _tag_proxy_request(self, ctx: core.ExecutionContext) -> None:
         base_url = self._get_base_url(instance=ctx.get_item("instance"))
         if self._is_instrumented_proxy_url(base_url):
             ctx.set_item(PROXY_REQUEST, True)
+
+    def _extract_tool_definitions(self, tool_config: Dict[str, Any]) -> List[ToolDefinition]:
+        """Extract tool definitions from the stored tool config."""
+        tools = _get_attr(tool_config, "tools", [])
+        tool_definitions = []
+        for tool in tools:
+            tool_spec = _get_attr(tool, "toolSpec", {})
+            tool_definition_info = ToolDefinition(
+                name=_get_attr(tool_spec, "name", ""),
+                description=_get_attr(tool_spec, "description", ""),
+                schema=_get_attr(tool_spec, "inputSchema", {}),
+            )
+            tool_definitions.append(tool_definition_info)
+        return tool_definitions
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
@@ -27,6 +27,8 @@
 from ddtrace.llmobs._utils import _get_attr
 from ddtrace.llmobs._utils import load_data_value
 from ddtrace.llmobs._utils import safe_json
+from ddtrace.llmobs.utils import ToolCall
+from ddtrace.llmobs.utils import ToolResult
 
 
 try:
@@ -232,23 +234,25 @@ def get_messages_from_converse_content(role: str, content: List[Dict[str, Any]])
     """
     if not content or not isinstance(content, list) or not isinstance(content[0], dict):
         return []
-    messages: List[Dict[str, Union[str, List[Dict[str, Any]]]]] = []
+    messages: List[Dict[str, Union[str, List[Dict[str, Any]], List[ToolCall], List[ToolResult]]]] = []
     content_blocks = []
     tool_calls_info = []
     tool_messages: List[Dict[str, Any]] = []
-    unsupported_content_messages: List[Dict[str, Union[str, List[Dict[str, Any]]]]] = []
+    unsupported_content_messages: List[
+        Dict[str, Union[str, List[Dict[str, Any]], List[ToolCall], List[ToolResult]]]
+    ] = []
     for content_block in content:
         if content_block.get("text") and isinstance(content_block.get("text"), str):
             content_blocks.append(content_block.get("text", ""))
         elif content_block.get("toolUse") and isinstance(content_block.get("toolUse"), dict):
             toolUse = content_block.get("toolUse", {})
-            tool_calls_info.append(
-                {
-                    "name": str(toolUse.get("name", "")),
-                    "arguments": toolUse.get("input", {}),
-                    "tool_id": str(toolUse.get("toolUseId", "")),
-                }
+            tool_call_info = ToolCall(
+                name=str(toolUse.get("name", "")),
+                arguments=toolUse.get("input", {}),
+                tool_id=str(toolUse.get("toolUseId", "")),
+                type="toolUse",
             )
+            tool_calls_info.append(tool_call_info)
         elif content_block.get("toolResult") and isinstance(content_block.get("toolResult"), dict):
             tool_message: Dict[str, Any] = content_block.get("toolResult", {})
             tool_message_contents: List[Dict[str, Any]] = tool_message.get("content", [])
@@ -258,21 +262,25 @@ def get_messages_from_converse_content(role: str, content: List[Dict[str, Any]])
                 tool_message_content_text: Optional[str] = tool_message_content.get("text")
                 tool_message_content_json: Optional[Dict[str, Any]] = tool_message_content.get("json")
 
+                tool_result_info = ToolResult(
+                    result=tool_message_content_text
+                    or (tool_message_content_json and safe_json(tool_message_content_json))
+                    or f"[Unsupported content type(s): {','.join(tool_message_content.keys())}]",
+                    tool_id=tool_message_id,
+                    type="toolResult",
+                )
                 tool_messages.append(
                     {
-                        "content": tool_message_content_text
-                        or (tool_message_content_json and safe_json(tool_message_content_json))
-                        or f"[Unsupported content type(s): {','.join(tool_message_content.keys())}]",
-                        "role": "tool",
-                        "tool_id": tool_message_id,
+                        "tool_results": [tool_result_info],
+                        "role": "user",
                     }
                 )
         else:
             content_type = ",".join(content_block.keys())
             unsupported_content_messages.append(
                 {"content": "[Unsupported content type: {}]".format(content_type), "role": role}
             )
-    message = {}  # type: dict[str, Union[str, list[dict[str, dict]]]]
+    message: Dict[str, Union[str, List[Dict[str, Any]], List[ToolCall], List[ToolResult]]] = {}
     if tool_calls_info:
         message.update({"tool_calls": tool_calls_info})
     if content_blocks:
@@ -1005,9 +1013,9 @@ def llmobs_output_messages(self) -> Tuple[List[Dict[str, Any]], List[Tuple[str,
                         "tool_calls": [
                             {
                                 "tool_id": item.call_id,
-                                "arguments": json.loads(item.arguments)
-                                if isinstance(item.arguments, str)
-                                else item.arguments,
+                                "arguments": (
+                                    json.loads(item.arguments) if isinstance(item.arguments, str) else item.arguments
+                                ),
                                 "name": getattr(item, "name", ""),
                                 "type": getattr(item, "type", "function"),
                             }
@@ -1119,19 +1127,20 @@ def get_final_message_converse_stream_message(
         tool_block = tool_blocks.get(idx)
         if not tool_block:
             continue
-        tool_call = {
-            "name": tool_block.get("name", ""),
-            "tool_id": tool_block.get("toolUseId", ""),
-        }
         tool_input = tool_block.get("input")
+        tool_args = {}
         if tool_input is not None:
-            tool_args = {}
             try:
                 tool_args = json.loads(tool_input)
             except (json.JSONDecodeError, ValueError):
                 tool_args = {"input": tool_input}
-            tool_call.update({"arguments": tool_args} if tool_args else {})
-        tool_calls.append(tool_call)
+        tool_call_info = ToolCall(
+            name=tool_block.get("name", ""),
+            tool_id=tool_block.get("toolUseId", ""),
+            arguments=tool_args if tool_args else {},
+            type="toolUse",
+        )
+        tool_calls.append(tool_call_info)
 
     if tool_calls:
         message_output["tool_calls"] = tool_calls
diff --git a/releasenotes/notes/bedrock_tool_usage-de330efb849d7449.yaml b/releasenotes/notes/bedrock_tool_usage-de330efb849d7449.yaml
@@ -0,0 +1,3 @@
+features:
+  - |
+    LLM Observability: Adds support for collecting tool definitions, tool calls and tool results in the Amazon Bedrock integration.
diff --git a/tests/contrib/botocore/bedrock_utils.py b/tests/contrib/botocore/bedrock_utils.py
@@ -25,6 +25,19 @@
     "luxury 4/5 star resorts)"
 )
 
+FETCH_CONCEPT_TOOL_DEFINITION = {
+    "name": "fetch_concept",
+    "description": "Fetch an expert explanation for a concept",
+    "schema": {
+        "json": {
+            "type": "object",
+            "properties": {"concept": {"type": "string", "description": "The concept to explain"}},
+            "required": ["concept"],
+        },
+    },
+}
+
+
 bedrock_converse_args_with_system_and_tool = {
     "system": "You are an expert swe that is to use the tool fetch_concept",
     "user_message": "Explain the concept of distributed tracing in a simple way",
diff --git a/tests/contrib/botocore/test_bedrock_llmobs.py b/tests/contrib/botocore/test_bedrock_llmobs.py
@@ -10,6 +10,7 @@
 from tests.contrib.botocore.bedrock_utils import _MODELS
 from tests.contrib.botocore.bedrock_utils import _REQUEST_BODIES
 from tests.contrib.botocore.bedrock_utils import BOTO_VERSION
+from tests.contrib.botocore.bedrock_utils import FETCH_CONCEPT_TOOL_DEFINITION
 from tests.contrib.botocore.bedrock_utils import bedrock_converse_args_with_system_and_tool
 from tests.contrib.botocore.bedrock_utils import create_bedrock_converse_request
 from tests.contrib.botocore.bedrock_utils import get_mock_response_data
@@ -268,6 +269,7 @@ def test_llmobs_converse(cls, bedrock_client, request_vcr, mock_tracer, llmobs_e
                             "arguments": {"concept": "distributed tracing"},
                             "name": "fetch_concept",
                             "tool_id": mock.ANY,
+                            "type": "toolUse",
                         }
                     ],
                 }
@@ -282,6 +284,7 @@ def test_llmobs_converse(cls, bedrock_client, request_vcr, mock_tracer, llmobs_e
                 "output_tokens": response["usage"]["outputTokens"],
                 "total_tokens": response["usage"]["totalTokens"],
             },
+            tool_definitions=[FETCH_CONCEPT_TOOL_DEFINITION],
             tags={"service": "aws.bedrock-runtime", "ml_app": "<ml-app-name>"},
         )
 
@@ -346,6 +349,7 @@ def test_llmobs_converse_stream(cls, bedrock_client, request_vcr, mock_tracer, l
                             "arguments": {"concept": "distributed tracing"},
                             "name": "fetch_concept",
                             "tool_id": mock.ANY,
+                            "type": "toolUse",
                         }
                     ],
                 }
@@ -359,6 +363,7 @@ def test_llmobs_converse_stream(cls, bedrock_client, request_vcr, mock_tracer, l
                 "output_tokens": 64,
                 "total_tokens": 323,
             },
+            tool_definitions=[FETCH_CONCEPT_TOOL_DEFINITION],
             tags={"service": "aws.bedrock-runtime", "ml_app": "<ml-app-name>"},
         )
 
@@ -398,6 +403,7 @@ def test_llmobs_converse_modified_stream(cls, bedrock_client, request_vcr, mock_
                             "arguments": {"concept": "distributed tracing"},
                             "name": "fetch_concept",
                             "tool_id": mock.ANY,
+                            "type": "toolUse",
                         }
                     ],
                 }
@@ -411,6 +417,7 @@ def test_llmobs_converse_modified_stream(cls, bedrock_client, request_vcr, mock_
                 "output_tokens": 64,
                 "total_tokens": 323,
             },
+            tool_definitions=[FETCH_CONCEPT_TOOL_DEFINITION],
             tags={"service": "aws.bedrock-runtime", "ml_app": "<ml-app-name>"},
         )
 
@@ -581,7 +588,9 @@ def test_llmobs_converse_tool_result_text(self, bedrock_client, request_vcr, moc
             )
 
         assert len(llmobs_events) == 1
-        assert llmobs_events[0]["meta"]["input"]["messages"] == [{"content": "bar", "role": "tool", "tool_id": "foo"}]
+        assert llmobs_events[0]["meta"]["input"]["messages"] == [
+            {"tool_results": [{"result": "bar", "tool_id": "foo", "type": "toolResult"}], "role": "user"}
+        ]
 
     @pytest.mark.skipif(BOTO_VERSION < (1, 34, 131), reason="Converse API not available until botocore 1.34.131")
     def test_llmobs_converse_tool_result_json(self, bedrock_client, request_vcr, mock_tracer, llmobs_events):
@@ -601,7 +610,7 @@ def test_llmobs_converse_tool_result_json(self, bedrock_client, request_vcr, moc
 
         assert len(llmobs_events) == 1
         assert llmobs_events[0]["meta"]["input"]["messages"] == [
-            {"content": '{"result": "bar"}', "role": "tool", "tool_id": "foo"}
+            {"tool_results": [{"result": '{"result": "bar"}', "tool_id": "foo", "type": "toolResult"}], "role": "user"}
         ]
 
     @pytest.mark.skipif(BOTO_VERSION < (1, 34, 131), reason="Converse API not available until botocore 1.34.131")
@@ -638,7 +647,12 @@ def test_llmobs_converse_tool_result_json_non_text_or_json(
 
         assert len(llmobs_events) == 1
         assert llmobs_events[0]["meta"]["input"]["messages"] == [
-            {"content": "[Unsupported content type(s): image]", "role": "tool", "tool_id": "foo"}
+            {
+                "tool_results": [
+                    {"result": "[Unsupported content type(s): image]", "tool_id": "foo", "type": "toolResult"}
+                ],
+                "role": "user",
+            }
         ]
 
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+features:`
	`2`	`+ - \|`
	`3`	`+ LLM Observability: Adds support for collecting tool definitions, tool calls and tool results in the Amazon Bedrock integration.`