Skip to content

Commit bf45719

Browse files
authored
Python: Update gen_ai traces and logs (#10173)
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> The gen_ai semantic convention has gone through some updates since we started generating telemetry for gen_ai operations. We would like to align with the latest conventions, and most importantly allow our users to visualize the gen_ai traces on the Azure AI Foundry Tracing UI, which relies on the gen_ai conventions. ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> 1. Updates the gen_ai telemetry module to align with the latest gen_ai convention so that all AI connectors generate telemetry data that can be visualized on Azure AI Foundry. 2. Unit tests for the update. > Note that this is a breaking change as this feature is still experimental. Anyone who is relying on the previous gen_ai conventions should also update. ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone 😄
1 parent bc3e294 commit bf45719

File tree

6 files changed

+271
-91
lines changed

6 files changed

+271
-91
lines changed

python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py

Lines changed: 110 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
import functools
44
import json
5+
import logging
56
from collections.abc import AsyncGenerator, Callable
67
from functools import reduce
7-
from typing import TYPE_CHECKING, Any
8+
from typing import TYPE_CHECKING, Any, ClassVar
89

910
from opentelemetry.trace import Span, StatusCode, get_tracer, use_span
1011

@@ -37,9 +38,36 @@
3738
TEXT_COMPLETION_OPERATION = "text.completions"
3839
TEXT_STREAMING_COMPLETION_OPERATION = "text.streaming_completions"
3940

41+
42+
# We're recording multiple events for the chat history, some of them are emitted within (hundreds of)
43+
# nanoseconds of each other. The default timestamp resolution is not high enough to guarantee unique
44+
# timestamps for each message. Also Azure Monitor truncates resolution to microseconds and some other
45+
# backends truncate to milliseconds.
46+
#
47+
# But we need to give users a way to restore chat message order, so we're incrementing the timestamp
48+
# by 1 microsecond for each message.
49+
#
50+
# This is a workaround, we'll find a generic and better solution - see
51+
# https://github.com/open-telemetry/semantic-conventions/issues/1701
52+
class ChatHistoryMessageTimestampFilter(logging.Filter):
53+
"""A filter to increment the timestamp of INFO logs by 1 microsecond."""
54+
55+
INDEX_KEY: ClassVar[str] = "CHAT_MESSAGE_INDEX"
56+
57+
def filter(self, record: logging.LogRecord) -> bool:
58+
"""Increment the timestamp of INFO logs by 1 microsecond."""
59+
if hasattr(record, self.INDEX_KEY):
60+
idx = getattr(record, self.INDEX_KEY)
61+
record.created += idx * 1e-6
62+
return True
63+
64+
4065
# Creates a tracer from the global tracer provider
4166
tracer = get_tracer(__name__)
4267

68+
logger = logging.getLogger(__name__)
69+
logger.addFilter(ChatHistoryMessageTimestampFilter())
70+
4371

4472
@experimental_function
4573
def are_model_diagnostics_enabled() -> bool:
@@ -87,19 +115,19 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[ChatMessageConten
87115
settings: "PromptExecutionSettings" = kwargs.get("settings") or args[2] # type: ignore
88116

89117
with use_span(
90-
_start_completion_activity(
118+
_get_completion_span(
91119
CHAT_COMPLETION_OPERATION,
92120
completion_service.ai_model_id,
93121
model_provider,
94122
completion_service.service_url(),
95-
chat_history,
96123
settings,
97124
),
98125
end_on_exit=True,
99126
) as current_span:
127+
_set_completion_input(model_provider, chat_history)
100128
try:
101129
completions: list[ChatMessageContent] = await completion_func(*args, **kwargs)
102-
_set_completion_response(current_span, completions)
130+
_set_completion_response(current_span, completions, model_provider)
103131
return completions
104132
except Exception as exception:
105133
_set_completion_error(current_span, exception)
@@ -144,16 +172,16 @@ async def wrapper_decorator(
144172
all_messages: dict[int, list[StreamingChatMessageContent]] = {}
145173

146174
with use_span(
147-
_start_completion_activity(
175+
_get_completion_span(
148176
CHAT_STREAMING_COMPLETION_OPERATION,
149177
completion_service.ai_model_id,
150178
model_provider,
151179
completion_service.service_url(),
152-
chat_history,
153180
settings,
154181
),
155182
end_on_exit=True,
156183
) as current_span:
184+
_set_completion_input(model_provider, chat_history)
157185
try:
158186
async for streaming_chat_message_contents in completion_func(*args, **kwargs):
159187
for streaming_chat_message_content in streaming_chat_message_contents:
@@ -166,7 +194,7 @@ async def wrapper_decorator(
166194
all_messages_flattened = [
167195
reduce(lambda x, y: x + y, messages) for messages in all_messages.values()
168196
]
169-
_set_completion_response(current_span, all_messages_flattened)
197+
_set_completion_response(current_span, all_messages_flattened, model_provider)
170198
except Exception as exception:
171199
_set_completion_error(current_span, exception)
172200
raise
@@ -203,19 +231,19 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[TextContent]:
203231
settings: "PromptExecutionSettings" = kwargs["settings"] if kwargs.get("settings") is not None else args[2]
204232

205233
with use_span(
206-
_start_completion_activity(
234+
_get_completion_span(
207235
TEXT_COMPLETION_OPERATION,
208236
completion_service.ai_model_id,
209237
model_provider,
210238
completion_service.service_url(),
211-
prompt,
212239
settings,
213240
),
214241
end_on_exit=True,
215242
) as current_span:
243+
_set_completion_input(model_provider, prompt)
216244
try:
217245
completions: list[TextContent] = await completion_func(*args, **kwargs)
218-
_set_completion_response(current_span, completions)
246+
_set_completion_response(current_span, completions, model_provider)
219247
return completions
220248
except Exception as exception:
221249
_set_completion_error(current_span, exception)
@@ -258,16 +286,16 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> AsyncGenerator[list["S
258286
all_text_contents: dict[int, list["StreamingTextContent"]] = {}
259287

260288
with use_span(
261-
_start_completion_activity(
289+
_get_completion_span(
262290
TEXT_STREAMING_COMPLETION_OPERATION,
263291
completion_service.ai_model_id,
264292
model_provider,
265293
completion_service.service_url(),
266-
prompt,
267294
settings,
268295
),
269296
end_on_exit=True,
270297
) as current_span:
298+
_set_completion_input(model_provider, prompt)
271299
try:
272300
async for streaming_text_contents in completion_func(*args, **kwargs):
273301
for streaming_text_content in streaming_text_contents:
@@ -280,7 +308,7 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> AsyncGenerator[list["S
280308
all_text_contents_flattened = [
281309
reduce(lambda x, y: x + y, messages) for messages in all_text_contents.values()
282310
]
283-
_set_completion_response(current_span, all_text_contents_flattened)
311+
_set_completion_response(current_span, all_text_contents_flattened, model_provider)
284312
except Exception as exception:
285313
_set_completion_error(current_span, exception)
286314
raise
@@ -292,15 +320,18 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> AsyncGenerator[list["S
292320
return inner_trace_streaming_text_completion
293321

294322

295-
def _start_completion_activity(
323+
def _get_completion_span(
296324
operation_name: str,
297325
model_name: str,
298326
model_provider: str,
299327
service_url: str | None,
300-
prompt: str | ChatHistory,
301328
execution_settings: "PromptExecutionSettings | None",
302329
) -> Span:
303-
"""Start a text or chat completion activity for a given model."""
330+
"""Start a text or chat completion span for a given model.
331+
332+
Note that `start_span` doesn't make the span the current span.
333+
Use `use_span` to make it the current span as a context manager.
334+
"""
304335
span = tracer.start_span(f"{operation_name} {model_name}")
305336

306337
# Set attributes on the span
@@ -316,24 +347,53 @@ def _start_completion_activity(
316347
# TODO(@glahaye): we'll need to have a way to get these attributes from model
317348
# providers other than OpenAI (for example if the attributes are named differently)
318349
if execution_settings:
319-
attribute = execution_settings.extension_data.get("max_tokens")
320-
if attribute:
321-
span.set_attribute(gen_ai_attributes.MAX_TOKENS, attribute)
350+
attribute_name_map = {
351+
"seed": gen_ai_attributes.SEED,
352+
"encoding_formats": gen_ai_attributes.ENCODING_FORMATS,
353+
"frequency_penalty": gen_ai_attributes.FREQUENCY_PENALTY,
354+
"max_tokens": gen_ai_attributes.MAX_TOKENS,
355+
"stop_sequences": gen_ai_attributes.STOP_SEQUENCES,
356+
"temperature": gen_ai_attributes.TEMPERATURE,
357+
"top_k": gen_ai_attributes.TOP_K,
358+
"top_p": gen_ai_attributes.TOP_P,
359+
}
360+
for attribute_name, attribute_key in attribute_name_map.items():
361+
attribute = execution_settings.extension_data.get(attribute_name)
362+
if attribute:
363+
span.set_attribute(attribute_key, attribute)
364+
365+
return span
322366

323-
attribute = execution_settings.extension_data.get("temperature")
324-
if attribute:
325-
span.set_attribute(gen_ai_attributes.TEMPERATURE, attribute)
326367

327-
attribute = execution_settings.extension_data.get("top_p")
328-
if attribute:
329-
span.set_attribute(gen_ai_attributes.TOP_P, attribute)
368+
def _set_completion_input(
369+
model_provider: str,
370+
prompt: str | ChatHistory,
371+
) -> None:
372+
"""Set the input for a text or chat completion.
330373
374+
The logs will be associated to the current span.
375+
"""
331376
if are_sensitive_events_enabled():
332377
if isinstance(prompt, ChatHistory):
333-
prompt = _messages_to_openai_format(prompt.messages)
334-
span.add_event(gen_ai_attributes.PROMPT_EVENT, {gen_ai_attributes.PROMPT_EVENT_PROMPT: prompt})
335-
336-
return span
378+
for idx, message in enumerate(prompt.messages):
379+
event_name = gen_ai_attributes.ROLE_EVENT_MAP.get(message.role)
380+
if event_name:
381+
logger.info(
382+
json.dumps(message.to_dict()),
383+
extra={
384+
gen_ai_attributes.EVENT_NAME: event_name,
385+
gen_ai_attributes.SYSTEM: model_provider,
386+
ChatHistoryMessageTimestampFilter.INDEX_KEY: idx,
387+
},
388+
)
389+
else:
390+
logger.info(
391+
prompt,
392+
extra={
393+
gen_ai_attributes.EVENT_NAME: gen_ai_attributes.PROMPT,
394+
gen_ai_attributes.SYSTEM: model_provider,
395+
},
396+
)
337397

338398

339399
def _set_completion_response(
@@ -342,8 +402,9 @@ def _set_completion_response(
342402
| list[TextContent]
343403
| list[StreamingChatMessageContent]
344404
| list[StreamingTextContent],
405+
model_provider: str,
345406
) -> None:
346-
"""Set the a text or chat completion response for a given activity."""
407+
"""Set the a text or chat completion response for a given span."""
347408
first_completion = completions[0]
348409

349410
# Set the response ID
@@ -362,33 +423,32 @@ def _set_completion_response(
362423
usage = first_completion.metadata.get("usage", None)
363424
if isinstance(usage, CompletionUsage):
364425
if usage.prompt_tokens:
365-
current_span.set_attribute(gen_ai_attributes.PROMPT_TOKENS, usage.prompt_tokens)
426+
current_span.set_attribute(gen_ai_attributes.INPUT_TOKENS, usage.prompt_tokens)
366427
if usage.completion_tokens:
367-
current_span.set_attribute(gen_ai_attributes.COMPLETION_TOKENS, usage.completion_tokens)
428+
current_span.set_attribute(gen_ai_attributes.OUTPUT_TOKENS, usage.completion_tokens)
368429

369430
# Set the completion event
370431
if are_sensitive_events_enabled():
371-
completion_text: str = _messages_to_openai_format(completions)
372-
current_span.add_event(
373-
gen_ai_attributes.COMPLETION_EVENT, {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: completion_text}
374-
)
432+
for completion in completions:
433+
full_response: dict[str, Any] = {
434+
"message": completion.to_dict(),
435+
}
436+
437+
if hasattr(completion, "finish_reason"):
438+
full_response["finish_reason"] = completion.finish_reason
439+
if hasattr(completion, "choice_index"):
440+
full_response["index"] = completion.choice_index
441+
442+
logger.info(
443+
json.dumps(full_response),
444+
extra={
445+
gen_ai_attributes.EVENT_NAME: gen_ai_attributes.CHOICE,
446+
gen_ai_attributes.SYSTEM: model_provider,
447+
},
448+
)
375449

376450

377451
def _set_completion_error(span: Span, error: Exception) -> None:
378452
"""Set an error for a text or chat completion ."""
379453
span.set_attribute(gen_ai_attributes.ERROR_TYPE, str(type(error)))
380454
span.set_status(StatusCode.ERROR, repr(error))
381-
382-
383-
def _messages_to_openai_format(
384-
messages: list[ChatMessageContent]
385-
| list[StreamingChatMessageContent]
386-
| list[TextContent]
387-
| list[StreamingTextContent],
388-
) -> str:
389-
"""Convert a list of ChatMessageContent to a string in the OpenAI format.
390-
391-
OpenTelemetry recommends formatting the messages in the OpenAI format
392-
regardless of the actual model being used.
393-
"""
394-
return json.dumps([message.to_dict() for message in messages])
Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,48 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

3+
from semantic_kernel.contents.utils.author_role import AuthorRole
4+
35
# Constants for tracing activities with semantic conventions.
46
# Ideally, we should use the attributes from the semcov package.
57
# However, many of the attributes are not yet available in the package,
68
# so we define them here for now.
79

810
# Activity tags
9-
SYSTEM = "gen_ai.system"
1011
OPERATION = "gen_ai.operation.name"
12+
SYSTEM = "gen_ai.system"
13+
ERROR_TYPE = "error.type"
1114
MODEL = "gen_ai.request.model"
12-
MAX_TOKENS = "gen_ai.request.max_tokens" # nosec
15+
SEED = "gen_ai.request.seed"
16+
PORT = "server.port"
17+
ENCODING_FORMATS = "gen_ai.request.encoding_formats"
18+
FREQUENCY_PENALTY = "gen_ai.request.frequency_penalty"
19+
MAX_TOKENS = "gen_ai.request.max_tokens"
20+
STOP_SEQUENCES = "gen_ai.request.stop_sequences"
1321
TEMPERATURE = "gen_ai.request.temperature"
22+
TOP_K = "gen_ai.request.top_k"
1423
TOP_P = "gen_ai.request.top_p"
15-
RESPONSE_ID = "gen_ai.response.id"
1624
FINISH_REASON = "gen_ai.response.finish_reason"
17-
PROMPT_TOKENS = "gen_ai.response.prompt_tokens" # nosec
18-
COMPLETION_TOKENS = "gen_ai.response.completion_tokens" # nosec
25+
RESPONSE_ID = "gen_ai.response.id"
26+
INPUT_TOKENS = "gen_ai.usage.input_tokens"
27+
OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
1928
ADDRESS = "server.address"
20-
PORT = "server.port"
21-
ERROR_TYPE = "error.type"
2229

2330
# Activity events
24-
PROMPT_EVENT = "gen_ai.content.prompt"
25-
COMPLETION_EVENT = "gen_ai.content.completion"
26-
27-
# Activity event attributes
28-
PROMPT_EVENT_PROMPT = "gen_ai.prompt"
29-
COMPLETION_EVENT_COMPLETION = "gen_ai.completion"
31+
EVENT_NAME = "event.name"
32+
SYSTEM_MESSAGE = "gen_ai.system.message"
33+
USER_MESSAGE = "gen_ai.user.message"
34+
ASSISTANT_MESSAGE = "gen_ai.assistant.message"
35+
TOOL_MESSAGE = "gen_ai.tool.message"
36+
CHOICE = "gen_ai.choice"
37+
PROMPT = "gen_ai.prompt"
3038

3139
# Kernel specific attributes
3240
AVAILABLE_FUNCTIONS = "sk.available_functions"
41+
42+
43+
ROLE_EVENT_MAP = {
44+
AuthorRole.SYSTEM: SYSTEM_MESSAGE,
45+
AuthorRole.USER: USER_MESSAGE,
46+
AuthorRole.ASSISTANT: ASSISTANT_MESSAGE,
47+
AuthorRole.TOOL: TOOL_MESSAGE,
48+
}

0 commit comments

Comments
 (0)