2
2
3
3
import functools
4
4
import json
5
+ import logging
5
6
from collections .abc import AsyncGenerator , Callable
6
7
from functools import reduce
7
- from typing import TYPE_CHECKING , Any
8
+ from typing import TYPE_CHECKING , Any , ClassVar
8
9
9
10
from opentelemetry .trace import Span , StatusCode , get_tracer , use_span
10
11
37
38
TEXT_COMPLETION_OPERATION = "text.completions"
38
39
TEXT_STREAMING_COMPLETION_OPERATION = "text.streaming_completions"
39
40
41
+
42
+ # We're recording multiple events for the chat history, some of them are emitted within (hundreds of)
43
+ # nanoseconds of each other. The default timestamp resolution is not high enough to guarantee unique
44
+ # timestamps for each message. Also Azure Monitor truncates resolution to microseconds and some other
45
+ # backends truncate to milliseconds.
46
+ #
47
+ # But we need to give users a way to restore chat message order, so we're incrementing the timestamp
48
+ # by 1 microsecond for each message.
49
+ #
50
+ # This is a workaround, we'll find a generic and better solution - see
51
+ # https://github.com/open-telemetry/semantic-conventions/issues/1701
52
+ class ChatHistoryMessageTimestampFilter (logging .Filter ):
53
+ """A filter to increment the timestamp of INFO logs by 1 microsecond."""
54
+
55
+ INDEX_KEY : ClassVar [str ] = "CHAT_MESSAGE_INDEX"
56
+
57
+ def filter (self , record : logging .LogRecord ) -> bool :
58
+ """Increment the timestamp of INFO logs by 1 microsecond."""
59
+ if hasattr (record , self .INDEX_KEY ):
60
+ idx = getattr (record , self .INDEX_KEY )
61
+ record .created += idx * 1e-6
62
+ return True
63
+
64
+
40
65
# Creates a tracer from the global tracer provider
41
66
tracer = get_tracer (__name__ )
42
67
68
+ logger = logging .getLogger (__name__ )
69
+ logger .addFilter (ChatHistoryMessageTimestampFilter ())
70
+
43
71
44
72
@experimental_function
45
73
def are_model_diagnostics_enabled () -> bool :
@@ -87,19 +115,19 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[ChatMessageConten
87
115
settings : "PromptExecutionSettings" = kwargs .get ("settings" ) or args [2 ] # type: ignore
88
116
89
117
with use_span (
90
- _start_completion_activity (
118
+ _get_completion_span (
91
119
CHAT_COMPLETION_OPERATION ,
92
120
completion_service .ai_model_id ,
93
121
model_provider ,
94
122
completion_service .service_url (),
95
- chat_history ,
96
123
settings ,
97
124
),
98
125
end_on_exit = True ,
99
126
) as current_span :
127
+ _set_completion_input (model_provider , chat_history )
100
128
try :
101
129
completions : list [ChatMessageContent ] = await completion_func (* args , ** kwargs )
102
- _set_completion_response (current_span , completions )
130
+ _set_completion_response (current_span , completions , model_provider )
103
131
return completions
104
132
except Exception as exception :
105
133
_set_completion_error (current_span , exception )
@@ -144,16 +172,16 @@ async def wrapper_decorator(
144
172
all_messages : dict [int , list [StreamingChatMessageContent ]] = {}
145
173
146
174
with use_span (
147
- _start_completion_activity (
175
+ _get_completion_span (
148
176
CHAT_STREAMING_COMPLETION_OPERATION ,
149
177
completion_service .ai_model_id ,
150
178
model_provider ,
151
179
completion_service .service_url (),
152
- chat_history ,
153
180
settings ,
154
181
),
155
182
end_on_exit = True ,
156
183
) as current_span :
184
+ _set_completion_input (model_provider , chat_history )
157
185
try :
158
186
async for streaming_chat_message_contents in completion_func (* args , ** kwargs ):
159
187
for streaming_chat_message_content in streaming_chat_message_contents :
@@ -166,7 +194,7 @@ async def wrapper_decorator(
166
194
all_messages_flattened = [
167
195
reduce (lambda x , y : x + y , messages ) for messages in all_messages .values ()
168
196
]
169
- _set_completion_response (current_span , all_messages_flattened )
197
+ _set_completion_response (current_span , all_messages_flattened , model_provider )
170
198
except Exception as exception :
171
199
_set_completion_error (current_span , exception )
172
200
raise
@@ -203,19 +231,19 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[TextContent]:
203
231
settings : "PromptExecutionSettings" = kwargs ["settings" ] if kwargs .get ("settings" ) is not None else args [2 ]
204
232
205
233
with use_span (
206
- _start_completion_activity (
234
+ _get_completion_span (
207
235
TEXT_COMPLETION_OPERATION ,
208
236
completion_service .ai_model_id ,
209
237
model_provider ,
210
238
completion_service .service_url (),
211
- prompt ,
212
239
settings ,
213
240
),
214
241
end_on_exit = True ,
215
242
) as current_span :
243
+ _set_completion_input (model_provider , prompt )
216
244
try :
217
245
completions : list [TextContent ] = await completion_func (* args , ** kwargs )
218
- _set_completion_response (current_span , completions )
246
+ _set_completion_response (current_span , completions , model_provider )
219
247
return completions
220
248
except Exception as exception :
221
249
_set_completion_error (current_span , exception )
@@ -258,16 +286,16 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> AsyncGenerator[list["S
258
286
all_text_contents : dict [int , list ["StreamingTextContent" ]] = {}
259
287
260
288
with use_span (
261
- _start_completion_activity (
289
+ _get_completion_span (
262
290
TEXT_STREAMING_COMPLETION_OPERATION ,
263
291
completion_service .ai_model_id ,
264
292
model_provider ,
265
293
completion_service .service_url (),
266
- prompt ,
267
294
settings ,
268
295
),
269
296
end_on_exit = True ,
270
297
) as current_span :
298
+ _set_completion_input (model_provider , prompt )
271
299
try :
272
300
async for streaming_text_contents in completion_func (* args , ** kwargs ):
273
301
for streaming_text_content in streaming_text_contents :
@@ -280,7 +308,7 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> AsyncGenerator[list["S
280
308
all_text_contents_flattened = [
281
309
reduce (lambda x , y : x + y , messages ) for messages in all_text_contents .values ()
282
310
]
283
- _set_completion_response (current_span , all_text_contents_flattened )
311
+ _set_completion_response (current_span , all_text_contents_flattened , model_provider )
284
312
except Exception as exception :
285
313
_set_completion_error (current_span , exception )
286
314
raise
@@ -292,15 +320,18 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> AsyncGenerator[list["S
292
320
return inner_trace_streaming_text_completion
293
321
294
322
295
- def _start_completion_activity (
323
+ def _get_completion_span (
296
324
operation_name : str ,
297
325
model_name : str ,
298
326
model_provider : str ,
299
327
service_url : str | None ,
300
- prompt : str | ChatHistory ,
301
328
execution_settings : "PromptExecutionSettings | None" ,
302
329
) -> Span :
303
- """Start a text or chat completion activity for a given model."""
330
+ """Start a text or chat completion span for a given model.
331
+
332
+ Note that `start_span` doesn't make the span the current span.
333
+ Use `use_span` to make it the current span as a context manager.
334
+ """
304
335
span = tracer .start_span (f"{ operation_name } { model_name } " )
305
336
306
337
# Set attributes on the span
@@ -316,24 +347,53 @@ def _start_completion_activity(
316
347
# TODO(@glahaye): we'll need to have a way to get these attributes from model
317
348
# providers other than OpenAI (for example if the attributes are named differently)
318
349
if execution_settings :
319
- attribute = execution_settings .extension_data .get ("max_tokens" )
320
- if attribute :
321
- span .set_attribute (gen_ai_attributes .MAX_TOKENS , attribute )
350
+ attribute_name_map = {
351
+ "seed" : gen_ai_attributes .SEED ,
352
+ "encoding_formats" : gen_ai_attributes .ENCODING_FORMATS ,
353
+ "frequency_penalty" : gen_ai_attributes .FREQUENCY_PENALTY ,
354
+ "max_tokens" : gen_ai_attributes .MAX_TOKENS ,
355
+ "stop_sequences" : gen_ai_attributes .STOP_SEQUENCES ,
356
+ "temperature" : gen_ai_attributes .TEMPERATURE ,
357
+ "top_k" : gen_ai_attributes .TOP_K ,
358
+ "top_p" : gen_ai_attributes .TOP_P ,
359
+ }
360
+ for attribute_name , attribute_key in attribute_name_map .items ():
361
+ attribute = execution_settings .extension_data .get (attribute_name )
362
+ if attribute :
363
+ span .set_attribute (attribute_key , attribute )
364
+
365
+ return span
322
366
323
- attribute = execution_settings .extension_data .get ("temperature" )
324
- if attribute :
325
- span .set_attribute (gen_ai_attributes .TEMPERATURE , attribute )
326
367
327
- attribute = execution_settings .extension_data .get ("top_p" )
328
- if attribute :
329
- span .set_attribute (gen_ai_attributes .TOP_P , attribute )
368
+ def _set_completion_input (
369
+ model_provider : str ,
370
+ prompt : str | ChatHistory ,
371
+ ) -> None :
372
+ """Set the input for a text or chat completion.
330
373
374
+ The logs will be associated to the current span.
375
+ """
331
376
if are_sensitive_events_enabled ():
332
377
if isinstance (prompt , ChatHistory ):
333
- prompt = _messages_to_openai_format (prompt .messages )
334
- span .add_event (gen_ai_attributes .PROMPT_EVENT , {gen_ai_attributes .PROMPT_EVENT_PROMPT : prompt })
335
-
336
- return span
378
+ for idx , message in enumerate (prompt .messages ):
379
+ event_name = gen_ai_attributes .ROLE_EVENT_MAP .get (message .role )
380
+ if event_name :
381
+ logger .info (
382
+ json .dumps (message .to_dict ()),
383
+ extra = {
384
+ gen_ai_attributes .EVENT_NAME : event_name ,
385
+ gen_ai_attributes .SYSTEM : model_provider ,
386
+ ChatHistoryMessageTimestampFilter .INDEX_KEY : idx ,
387
+ },
388
+ )
389
+ else :
390
+ logger .info (
391
+ prompt ,
392
+ extra = {
393
+ gen_ai_attributes .EVENT_NAME : gen_ai_attributes .PROMPT ,
394
+ gen_ai_attributes .SYSTEM : model_provider ,
395
+ },
396
+ )
337
397
338
398
339
399
def _set_completion_response (
@@ -342,8 +402,9 @@ def _set_completion_response(
342
402
| list [TextContent ]
343
403
| list [StreamingChatMessageContent ]
344
404
| list [StreamingTextContent ],
405
+ model_provider : str ,
345
406
) -> None :
346
- """Set the a text or chat completion response for a given activity ."""
407
+ """Set the a text or chat completion response for a given span ."""
347
408
first_completion = completions [0 ]
348
409
349
410
# Set the response ID
@@ -362,33 +423,32 @@ def _set_completion_response(
362
423
usage = first_completion .metadata .get ("usage" , None )
363
424
if isinstance (usage , CompletionUsage ):
364
425
if usage .prompt_tokens :
365
- current_span .set_attribute (gen_ai_attributes .PROMPT_TOKENS , usage .prompt_tokens )
426
+ current_span .set_attribute (gen_ai_attributes .INPUT_TOKENS , usage .prompt_tokens )
366
427
if usage .completion_tokens :
367
- current_span .set_attribute (gen_ai_attributes .COMPLETION_TOKENS , usage .completion_tokens )
428
+ current_span .set_attribute (gen_ai_attributes .OUTPUT_TOKENS , usage .completion_tokens )
368
429
369
430
# Set the completion event
370
431
if are_sensitive_events_enabled ():
371
- completion_text : str = _messages_to_openai_format (completions )
372
- current_span .add_event (
373
- gen_ai_attributes .COMPLETION_EVENT , {gen_ai_attributes .COMPLETION_EVENT_COMPLETION : completion_text }
374
- )
432
+ for completion in completions :
433
+ full_response : dict [str , Any ] = {
434
+ "message" : completion .to_dict (),
435
+ }
436
+
437
+ if hasattr (completion , "finish_reason" ):
438
+ full_response ["finish_reason" ] = completion .finish_reason
439
+ if hasattr (completion , "choice_index" ):
440
+ full_response ["index" ] = completion .choice_index
441
+
442
+ logger .info (
443
+ json .dumps (full_response ),
444
+ extra = {
445
+ gen_ai_attributes .EVENT_NAME : gen_ai_attributes .CHOICE ,
446
+ gen_ai_attributes .SYSTEM : model_provider ,
447
+ },
448
+ )
375
449
376
450
377
451
def _set_completion_error (span : Span , error : Exception ) -> None :
378
452
"""Set an error for a text or chat completion ."""
379
453
span .set_attribute (gen_ai_attributes .ERROR_TYPE , str (type (error )))
380
454
span .set_status (StatusCode .ERROR , repr (error ))
381
-
382
-
383
- def _messages_to_openai_format (
384
- messages : list [ChatMessageContent ]
385
- | list [StreamingChatMessageContent ]
386
- | list [TextContent ]
387
- | list [StreamingTextContent ],
388
- ) -> str :
389
- """Convert a list of ChatMessageContent to a string in the OpenAI format.
390
-
391
- OpenTelemetry recommends formatting the messages in the OpenAI format
392
- regardless of the actual model being used.
393
- """
394
- return json .dumps ([message .to_dict () for message in messages ])
0 commit comments