Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/generative_ai_toolkit/metrics/modules/conciseness.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def evaluate_conversation(self, conversation_traces, **kwargs):
Example output:
{{ "score": 9, "reasoning": "The agent's responses are concise, and it does not provide superfluous examples or useless encouragements."}}

Only return the valid JSON object.
Only return the valid JSON object. Do not wrap it in markdown code blocks or any other formatting.
"""
)
.format(conversation=json.dumps(user_conversation))
Expand Down
2 changes: 1 addition & 1 deletion src/generative_ai_toolkit/metrics/modules/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def evaluate_conversation(self, conversation_traces, **kwargs):
Example output:
{{ "score": 9, "reasoning": "The agent succeeded in helping the user as expected"}}

Only return the JSON object.
Only return the JSON object. Do not wrap it in markdown code blocks or any other formatting.
"""
)
.format(
Expand Down
28 changes: 13 additions & 15 deletions src/generative_ai_toolkit/metrics/modules/latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from generative_ai_toolkit.metrics import BaseMetric, Measurement, Unit
from generative_ai_toolkit.utils.logging import logger


class LatencyMetric(BaseMetric):
Expand All @@ -30,20 +29,19 @@ def evaluate_trace(self, trace, **kwargs):

dimensions = []
trace_type = trace.attributes.get("ai.trace.type")
if trace_type == "tool-invocation":
dimensions.append({"ToolName": trace.attributes["ai.tool.name"]})
elif trace_type == "llm-invocation":
dimensions.append(
{"ModelName": trace.attributes["ai.llm.request.model.id"]}
)
elif trace_type == "conversation-history-list":
dimensions.append({"ConversationHistory": "list-messages"})
elif trace_type == "conversation-history-add":
dimensions.append({"ConversationHistory": "add-message"})
elif trace_type in {"converse", "converse-stream"}:
dimensions.append({"Converse": trace_type})
else:
logger.warn("Unknown trace type", trace_type=trace_type)
match trace_type:
case "tool-invocation":
dimensions.append({"ToolName": trace.attributes["ai.tool.name"]})
case "llm-invocation":
dimensions.append(
{"ModelName": trace.attributes["ai.llm.request.model.id"]}
)
case "conversation-history-list":
dimensions.append({"ConversationHistory": "list-messages"})
case "conversation-history-add":
dimensions.append({"ConversationHistory": "add-message"})
case "converse" | "converse-stream":
dimensions.append({"Converse": trace_type})

return Measurement(
name="Latency",
Expand Down
16 changes: 16 additions & 0 deletions src/generative_ai_toolkit/utils/llm_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,22 @@ def get_text(response: "ConverseResponseTypeDef"):

def json_parse(response: "ConverseResponseTypeDef"):
text = get_text(response).strip()

# Handle markdown code blocks
if text.startswith("```json"):
# Find the closing ``` and extract content between
end_marker = text.rfind("```")
if end_marker > 7: # Make sure we found a closing marker after ```json
text = text[7:end_marker].strip() # Remove ```json and closing ```
elif text.startswith("```") and text.count("```") >= 2:
# Handle generic code blocks that might contain JSON
first_newline = text.find('\n')
if first_newline != -1:
# Skip the opening ``` line
end_marker = text.rfind("```")
if end_marker > first_newline:
text = text[first_newline+1:end_marker].strip()

try:
return json.loads(text.replace("\n", " "))
except json.decoder.JSONDecodeError as e:
Expand Down
Loading