awslabs · RyanFrench · Dec 15, 2025 · Dec 15, 2025 · Dec 18, 2025 · Dec 18, 2025
diff --git a/src/generative_ai_toolkit/metrics/modules/conciseness.py b/src/generative_ai_toolkit/metrics/modules/conciseness.py
@@ -68,7 +68,7 @@ def evaluate_conversation(self, conversation_traces, **kwargs):
                                 Example output:
                                 {{ "score": 9, "reasoning": "The agent's responses are concise, and it does not provide superfluous examples or useless encouragements."}}
 
-                                Only return the valid JSON object.
+                                Only return the valid JSON object. Do not wrap it in markdown code blocks or any other formatting.
                                 """
                             )
                             .format(conversation=json.dumps(user_conversation))

diff --git a/src/generative_ai_toolkit/metrics/modules/conversation.py b/src/generative_ai_toolkit/metrics/modules/conversation.py
@@ -83,7 +83,7 @@ def evaluate_conversation(self, conversation_traces, **kwargs):
                                 Example output:
                                 {{ "score": 9, "reasoning": "The agent succeeded in helping the user as expected"}}
 
-                                Only return the JSON object.
+                                Only return the JSON object. Do not wrap it in markdown code blocks or any other formatting.
                                 """
                             )
                             .format(

diff --git a/src/generative_ai_toolkit/metrics/modules/latency.py b/src/generative_ai_toolkit/metrics/modules/latency.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from generative_ai_toolkit.metrics import BaseMetric, Measurement, Unit
-from generative_ai_toolkit.utils.logging import logger
 
 
 class LatencyMetric(BaseMetric):
@@ -30,20 +29,19 @@ def evaluate_trace(self, trace, **kwargs):
 
         dimensions = []
         trace_type = trace.attributes.get("ai.trace.type")
-        if trace_type == "tool-invocation":
-            dimensions.append({"ToolName": trace.attributes["ai.tool.name"]})
-        elif trace_type == "llm-invocation":
-            dimensions.append(
-                {"ModelName": trace.attributes["ai.llm.request.model.id"]}
-            )
-        elif trace_type == "conversation-history-list":
-            dimensions.append({"ConversationHistory": "list-messages"})
-        elif trace_type == "conversation-history-add":
-            dimensions.append({"ConversationHistory": "add-message"})
-        elif trace_type in {"converse", "converse-stream"}:
-            dimensions.append({"Converse": trace_type})
-        else:
-            logger.warn("Unknown trace type", trace_type=trace_type)
+        match trace_type:
+            case "tool-invocation":
+                dimensions.append({"ToolName": trace.attributes["ai.tool.name"]})
+            case "llm-invocation":
+                dimensions.append(
+                    {"ModelName": trace.attributes["ai.llm.request.model.id"]}
+                )
+            case "conversation-history-list":
+                dimensions.append({"ConversationHistory": "list-messages"})
+            case "conversation-history-add":
+                dimensions.append({"ConversationHistory": "add-message"})
+            case "converse" | "converse-stream":
+                dimensions.append({"Converse": trace_type})
 
         return Measurement(
             name="Latency",

diff --git a/src/generative_ai_toolkit/utils/llm_response.py b/src/generative_ai_toolkit/utils/llm_response.py
@@ -30,6 +30,22 @@ def get_text(response: "ConverseResponseTypeDef"):
 
 def json_parse(response: "ConverseResponseTypeDef"):
     text = get_text(response).strip()
+
+    # Handle markdown code blocks
+    if text.startswith("```json"):
+        # Find the closing ``` and extract content between
+        end_marker = text.rfind("```")
+        if end_marker > 7:  # Make sure we found a closing marker after ```json
+            text = text[7:end_marker].strip()  # Remove ```json and closing ```
+    elif text.startswith("```") and text.count("```") >= 2:
+        # Handle generic code blocks that might contain JSON
+        first_newline = text.find('\n')
+        if first_newline != -1:
+            # Skip the opening ``` line
+            end_marker = text.rfind("```")
+            if end_marker > first_newline:
+                text = text[first_newline+1:end_marker].strip()
+
     try:
         return json.loads(text.replace("\n", " "))
     except json.decoder.JSONDecodeError as e: