diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 3cf12442149..6aed01be783 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -3,6 +3,8 @@ import litellm from ddtrace import config +from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMAsyncStream +from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMStream from ddtrace.contrib.trace_utils import unwrap from ddtrace.contrib.trace_utils import with_traced_module from ddtrace.contrib.trace_utils import wrap @@ -21,6 +23,25 @@ def get_version() -> str: @with_traced_module def traced_completion(litellm, pin, func, instance, args, kwargs): + return _traced_completion(litellm, pin, func, instance, args, kwargs, False) + + +@with_traced_module +async def traced_acompletion(litellm, pin, func, instance, args, kwargs): + return await _traced_acompletion(litellm, pin, func, instance, args, kwargs, False) + + +@with_traced_module +def traced_text_completion(litellm, pin, func, instance, args, kwargs): + return _traced_completion(litellm, pin, func, instance, args, kwargs, True) + + +@with_traced_module +async def traced_atext_completion(litellm, pin, func, instance, args, kwargs): + return await _traced_acompletion(litellm, pin, func, instance, args, kwargs, True) + + +def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion): integration = litellm._datadog_integration model = get_argument_value(args, kwargs, 0, "model", None) host = None @@ -31,19 +52,29 @@ def traced_completion(litellm, pin, func, instance, args, kwargs): func.__name__, model=model, host=host, - submit_to_llmobs=False, + submit_to_llmobs=integration.should_submit_to_llmobs(kwargs, model), ) + stream = kwargs.get("stream", False) + resp = None try: - return func(*args, **kwargs) + resp = func(*args, **kwargs) + if stream: + return TracedLiteLLMStream(resp, integration, span, kwargs, is_completion) + return resp except Exception: span.set_exc_info(*sys.exc_info()) raise finally: - span.finish() + # streamed spans will be finished separately once the stream generator is exhausted + if not stream: + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags( + span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat" + ) + span.finish() -@with_traced_module -async def traced_acompletion(litellm, pin, func, instance, args, kwargs): +async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_completion): integration = litellm._datadog_integration model = get_argument_value(args, kwargs, 0, "model", None) host = None @@ -54,15 +85,36 @@ async def traced_acompletion(litellm, pin, func, instance, args, kwargs): func.__name__, model=model, host=host, - submit_to_llmobs=False, + submit_to_llmobs=integration.should_submit_to_llmobs(kwargs, model), ) + stream = kwargs.get("stream", False) + resp = None try: - return await func(*args, **kwargs) + resp = await func(*args, **kwargs) + if stream: + return TracedLiteLLMAsyncStream(resp, integration, span, kwargs, is_completion) + return resp except Exception: span.set_exc_info(*sys.exc_info()) raise finally: - span.finish() + # streamed spans will be finished separately once the stream generator is exhausted + if not stream: + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags( + span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat" + ) + span.finish() + + +@with_traced_module +def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs): + requested_model = get_argument_value(args, kwargs, 0, "model", None) + integration = litellm._datadog_integration + model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs) + # store the model name and provider in the integration + integration._model_map[requested_model] = (model, custom_llm_provider) + return model, custom_llm_provider, dynamic_api_key, api_base def patch(): @@ -77,8 +129,10 @@ def patch(): wrap("litellm", "completion", traced_completion(litellm)) wrap("litellm", "acompletion", traced_acompletion(litellm)) - wrap("litellm", "text_completion", traced_completion(litellm)) - wrap("litellm", "atext_completion", traced_acompletion(litellm)) + wrap("litellm", "text_completion", traced_text_completion(litellm)) + wrap("litellm", "atext_completion", traced_atext_completion(litellm)) + wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm)) + wrap("litellm", "main.get_llm_provider", traced_get_llm_provider(litellm)) def unpatch(): @@ -91,5 +145,7 @@ def unpatch(): unwrap(litellm, "acompletion") unwrap(litellm, "text_completion") unwrap(litellm, "atext_completion") + unwrap(litellm, "get_llm_provider") + unwrap(litellm.main, "get_llm_provider") delattr(litellm, "_datadog_integration") diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py new file mode 100644 index 00000000000..11b996891a4 --- /dev/null +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -0,0 +1,129 @@ +import sys + +import wrapt + +from ddtrace.internal.logger import get_logger +from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks +from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks + + +log = get_logger(__name__) + + +class BaseTracedLiteLLMStream(wrapt.ObjectProxy): + def __init__(self, wrapped, integration, span, kwargs, is_completion=False): + super().__init__(wrapped) + n = kwargs.get("n", 1) or 1 + self._dd_integration = integration + self._dd_span = span + self._kwargs = kwargs + self._streamed_chunks = [[] for _ in range(n)] + self._is_completion = is_completion + + +class TracedLiteLLMStream(BaseTracedLiteLLMStream): + def __enter__(self): + self.__wrapped__.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.__wrapped__.__exit__(exc_type, exc_val, exc_tb) + + def __iter__(self): + try: + for chunk in self.__wrapped__: + yield chunk + _loop_handler(chunk, self._streamed_chunks) + except Exception: + self._dd_span.set_exc_info(*sys.exc_info()) + raise + finally: + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) + self._dd_span.finish() + + def __next__(self): + try: + chunk = self.__wrapped__.__next__() + _loop_handler(chunk, self._streamed_chunks) + return chunk + except StopIteration: + raise + except Exception: + self._dd_span.set_exc_info(*sys.exc_info()) + raise + finally: + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) + self._dd_span.finish() + + +class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream): + async def __aenter__(self): + await self.__wrapped__.__aenter__() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.__wrapped__.__aexit__(exc_type, exc_val, exc_tb) + + async def __aiter__(self): + try: + async for chunk in self.__wrapped__: + yield chunk + _loop_handler(chunk, self._streamed_chunks) + except Exception: + self._dd_span.set_exc_info(*sys.exc_info()) + raise + finally: + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) + self._dd_span.finish() + + async def __anext__(self): + try: + chunk = await self.__wrapped__.__anext__() + _loop_handler(chunk, self._streamed_chunks) + return chunk + except StopAsyncIteration: + raise + except Exception: + self._dd_span.set_exc_info(*sys.exc_info()) + raise + finally: + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) + self._dd_span.finish() + + +def _loop_handler(chunk, streamed_chunks): + """Appends the chunk to the correct index in the streamed_chunks list. + + When handling a streamed chat/completion response, this function is called for each chunk in the streamed response. + """ + for choice in chunk.choices: + streamed_chunks[choice.index].append(choice) + if getattr(chunk, "usage", None): + streamed_chunks[0].insert(0, chunk) + + +def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_completion=False): + try: + if is_completion: + formatted_completions = [ + openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks + ] + else: + formatted_completions = [ + openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks + ] + operation = "completion" if is_completion else "chat" + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags( + span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation + ) + except Exception: + log.warning("Error processing streamed completion/chat response.", exc_info=True) diff --git a/ddtrace/contrib/internal/openai/utils.py b/ddtrace/contrib/internal/openai/utils.py index c421d57c74c..dca02cb8ed9 100644 --- a/ddtrace/contrib/internal/openai/utils.py +++ b/ddtrace/contrib/internal/openai/utils.py @@ -1,14 +1,13 @@ import re import sys -from typing import Any from typing import AsyncGenerator -from typing import Dict from typing import Generator -from typing import List import wrapt from ddtrace.internal.logger import get_logger +from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks +from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks from ddtrace.llmobs._utils import _get_attr @@ -265,9 +264,13 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp request_messages = kwargs.get("messages", None) try: if is_completion: - formatted_completions = [_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks] + formatted_completions = [ + openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks + ] else: - formatted_completions = [_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks] + formatted_completions = [ + openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks + ] if integration.is_pc_sampled_span(span): _tag_streamed_response(integration, span, formatted_completions) _set_token_metrics(span, formatted_completions, prompts, request_messages, kwargs) @@ -277,82 +280,6 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp log.warning("Error processing streamed completion/chat response.", exc_info=True) -def _construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: - """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks.""" - if not streamed_chunks: - return {"text": ""} - completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))} - if streamed_chunks[-1].finish_reason is not None: - completion["finish_reason"] = streamed_chunks[-1].finish_reason - if hasattr(streamed_chunks[0], "usage"): - completion["usage"] = streamed_chunks[0].usage - return completion - - -def _construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_chunk=None, function_call_chunk=None): - """Builds a tool_call dictionary from streamed function_call/tool_call chunks.""" - if function_call_chunk: - if not stored_tool_calls: - stored_tool_calls.append({"name": getattr(function_call_chunk, "name", ""), "arguments": ""}) - stored_tool_calls[0]["arguments"] += getattr(function_call_chunk, "arguments", "") - return - if not tool_call_chunk: - return - tool_call_idx = getattr(tool_call_chunk, "index", None) - tool_id = getattr(tool_call_chunk, "id", None) - tool_type = getattr(tool_call_chunk, "type", None) - function_call = getattr(tool_call_chunk, "function", None) - function_name = getattr(function_call, "name", "") - # Find tool call index in tool_calls list, as it may potentially arrive unordered (i.e. index 2 before 0) - list_idx = next( - (idx for idx, tool_call in enumerate(stored_tool_calls) if tool_call["index"] == tool_call_idx), - None, - ) - if list_idx is None: - stored_tool_calls.append( - {"name": function_name, "arguments": "", "index": tool_call_idx, "tool_id": tool_id, "type": tool_type} - ) - list_idx = -1 - stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "") - - -def _construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: - """Constructs a chat completion message dictionary from streamed chunks. - The resulting message dictionary is of form: - {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."} - """ - message = {"content": "", "tool_calls": []} - for chunk in streamed_chunks: - if getattr(chunk, "usage", None): - message["usage"] = chunk.usage - if not hasattr(chunk, "delta"): - continue - if getattr(chunk, "index", None) and not message.get("index"): - message["index"] = chunk.index - if getattr(chunk.delta, "role") and not message.get("role"): - message["role"] = chunk.delta.role - if getattr(chunk, "finish_reason", None) and not message.get("finish_reason"): - message["finish_reason"] = chunk.finish_reason - chunk_content = getattr(chunk.delta, "content", "") - if chunk_content: - message["content"] += chunk_content - continue - function_call = getattr(chunk.delta, "function_call", None) - if function_call: - _construct_tool_call_from_streamed_chunk(message["tool_calls"], function_call_chunk=function_call) - tool_calls = getattr(chunk.delta, "tool_calls", None) - if not tool_calls: - continue - for tool_call in tool_calls: - _construct_tool_call_from_streamed_chunk(message["tool_calls"], tool_call_chunk=tool_call) - if message["tool_calls"]: - message["tool_calls"].sort(key=lambda x: x.get("index", 0)) - else: - message.pop("tool_calls", None) - message["content"] = message["content"].strip() - return message - - def _tag_streamed_response(integration, span, completions_or_messages=None): """Tagging logic for streamed completions and chat completions.""" for idx, choice in enumerate(completions_or_messages): diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index 5391ee80be4..80107500afe 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -1,13 +1,29 @@ from typing import Any from typing import Dict +from typing import List from typing import Optional +from typing import Tuple +from ddtrace.internal.utils import get_argument_value +from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY +from ddtrace.llmobs._constants import METRICS +from ddtrace.llmobs._constants import MODEL_NAME +from ddtrace.llmobs._constants import MODEL_PROVIDER +from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY +from ddtrace.llmobs._constants import SPAN_KIND +from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._integrations.base import BaseLLMIntegration +from ddtrace.llmobs._integrations.openai import openai_set_meta_tags_from_chat +from ddtrace.llmobs._integrations.openai import openai_set_meta_tags_from_completion +from ddtrace.llmobs._llmobs import LLMObs +from ddtrace.llmobs._utils import _get_attr from ddtrace.trace import Span class LiteLLMIntegration(BaseLLMIntegration): _integration_name = "litellm" + # maps requested model name to parsed model name and provider + _model_map: Dict[str, Tuple[str, str]] = {} def _set_base_span_tags( self, span: Span, model: Optional[str] = None, host: Optional[str] = None, **kwargs: Dict[str, Any] @@ -16,3 +32,65 @@ def _set_base_span_tags( span.set_tag_str("litellm.request.model", model) if host is not None: span.set_tag_str("litellm.request.host", host) + + def _llmobs_set_tags( + self, + span: Span, + args: List[Any], + kwargs: Dict[str, Any], + response: Optional[Any] = None, + operation: str = "", + ) -> None: + model_name = get_argument_value(args, kwargs, 0, "model", False) or "" + model_name, model_provider = self._model_map.get(model_name, (model_name, "")) + + # use Open AI helpers since response format will match Open AI + if operation == "completion": + openai_set_meta_tags_from_completion(span, kwargs, response) + else: + openai_set_meta_tags_from_chat(span, kwargs, response) + + metrics = self._extract_llmobs_metrics(response) + span._set_ctx_items( + {SPAN_KIND: "llm", MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider, METRICS: metrics} + ) + + @staticmethod + def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]: + if not resp: + return {} + if isinstance(resp, list): + token_usage = _get_attr(resp[0], "usage", None) + else: + token_usage = _get_attr(resp, "usage", None) + if token_usage is None: + return {} + prompt_tokens = _get_attr(token_usage, "prompt_tokens", 0) + completion_tokens = _get_attr(token_usage, "completion_tokens", 0) + return { + INPUT_TOKENS_METRIC_KEY: prompt_tokens, + OUTPUT_TOKENS_METRIC_KEY: completion_tokens, + TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens, + } + + def should_submit_to_llmobs(self, kwargs: Dict[str, Any], model: Optional[str] = None) -> bool: + """ + Span should be NOT submitted to LLMObs if: + - base_url is not None: is a proxy request and we will capture the LLM request downstream + - non-streamed request and model provider is OpenAI/AzureOpenAI and the OpenAI integration + is enabled: this request will be captured in the OpenAI integration instead + """ + base_url = kwargs.get("api_base", None) + if base_url is not None: + return False + stream = kwargs.get("stream", False) + model_lower = model.lower() if model else "" + # model provider is unknown until request completes; therefore, this is a best effort attempt to check + # if model provider is Open AI or Azure + if ( + any(prefix in model_lower for prefix in ("gpt", "openai", "azure")) + and not stream + and LLMObs._integration_is_enabled("openai") + ): + return False + return True diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py index 98f6123cc5a..aebe8ae3207 100644 --- a/ddtrace/llmobs/_integrations/openai.py +++ b/ddtrace/llmobs/_integrations/openai.py @@ -1,24 +1,17 @@ -import json from typing import Any from typing import Dict from typing import List from typing import Optional from typing import Tuple -from ddtrace.internal import core from ddtrace.internal.constants import COMPONENT -from ddtrace.internal.utils.formats import format_trace_id from ddtrace.internal.utils.version import parse_version -from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE -from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED from ddtrace.llmobs._constants import INPUT_DOCUMENTS -from ddtrace.llmobs._constants import INPUT_MESSAGES from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import METADATA from ddtrace.llmobs._constants import METRICS from ddtrace.llmobs._constants import MODEL_NAME from ddtrace.llmobs._constants import MODEL_PROVIDER -from ddtrace.llmobs._constants import OUTPUT_MESSAGES from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import OUTPUT_VALUE from ddtrace.llmobs._constants import SPAN_KIND @@ -26,6 +19,8 @@ from ddtrace.llmobs._integrations.base import BaseLLMIntegration from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags from ddtrace.llmobs._integrations.utils import is_openai_default_base_url +from ddtrace.llmobs._integrations.utils import openai_set_meta_tags_from_chat +from ddtrace.llmobs._integrations.utils import openai_set_meta_tags_from_completion from ddtrace.llmobs._utils import _get_attr from ddtrace.llmobs.utils import Document from ddtrace.trace import Pin @@ -128,9 +123,9 @@ def _llmobs_set_tags( model_provider = "deepseek" if operation == "completion": - self._llmobs_set_meta_tags_from_completion(span, kwargs, response) + openai_set_meta_tags_from_completion(span, kwargs, response) elif operation == "chat": - self._llmobs_set_meta_tags_from_chat(span, kwargs, response) + openai_set_meta_tags_from_chat(span, kwargs, response) elif operation == "embedding": self._llmobs_set_meta_tags_from_embedding(span, kwargs, response) metrics = self._extract_llmobs_metrics_tags(span, response) @@ -138,102 +133,6 @@ def _llmobs_set_tags( {SPAN_KIND: span_kind, MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider, METRICS: metrics} ) - @staticmethod - def _llmobs_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], completions: Any) -> None: - """Extract prompt/response tags from a completion and set them as temporary "_ml_obs.meta.*" tags.""" - prompt = kwargs.get("prompt", "") - if isinstance(prompt, str): - prompt = [prompt] - parameters = {k: v for k, v in kwargs.items() if k not in ("model", "prompt")} - output_messages = [{"content": ""}] - if not span.error and completions: - choices = getattr(completions, "choices", completions) - output_messages = [{"content": _get_attr(choice, "text", "")} for choice in choices] - span._set_ctx_items( - { - INPUT_MESSAGES: [{"content": str(p)} for p in prompt], - METADATA: parameters, - OUTPUT_MESSAGES: output_messages, - } - ) - - @staticmethod - def _llmobs_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: Optional[Any]) -> None: - """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags.""" - input_messages = [] - for m in kwargs.get("messages", []): - tool_call_id = m.get("tool_call_id") - if tool_call_id: - core.dispatch(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, (tool_call_id, span)) - input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))}) - parameters = {k: v for k, v in kwargs.items() if k not in ("model", "messages", "tools", "functions")} - span._set_ctx_items({INPUT_MESSAGES: input_messages, METADATA: parameters}) - - if span.error or not messages: - span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}]) - return - if isinstance(messages, list): # streamed response - output_messages = [] - for streamed_message in messages: - message = {"content": streamed_message["content"], "role": streamed_message["role"]} - tool_calls = streamed_message.get("tool_calls", []) - if tool_calls: - message["tool_calls"] = [ - { - "name": tool_call.get("name", ""), - "arguments": json.loads(tool_call.get("arguments", "")), - "tool_id": tool_call.get("tool_id", ""), - "type": tool_call.get("type", ""), - } - for tool_call in tool_calls - ] - output_messages.append(message) - span._set_ctx_item(OUTPUT_MESSAGES, output_messages) - return - choices = _get_attr(messages, "choices", []) - output_messages = [] - for idx, choice in enumerate(choices): - tool_calls_info = [] - choice_message = _get_attr(choice, "message", {}) - role = _get_attr(choice_message, "role", "") - content = _get_attr(choice_message, "content", "") or "" - function_call = _get_attr(choice_message, "function_call", None) - if function_call: - function_name = _get_attr(function_call, "name", "") - arguments = json.loads(_get_attr(function_call, "arguments", "")) - function_call_info = {"name": function_name, "arguments": arguments} - output_messages.append({"content": content, "role": role, "tool_calls": [function_call_info]}) - continue - tool_calls = _get_attr(choice_message, "tool_calls", []) or [] - for tool_call in tool_calls: - tool_args = getattr(tool_call.function, "arguments", "") - tool_name = getattr(tool_call.function, "name", "") - tool_id = getattr(tool_call, "id", "") - tool_call_info = { - "name": tool_name, - "arguments": json.loads(tool_args), - "tool_id": tool_id, - "type": "function", - } - tool_calls_info.append(tool_call_info) - core.dispatch( - DISPATCH_ON_LLM_TOOL_CHOICE, - ( - tool_id, - tool_name, - tool_args, - { - "trace_id": format_trace_id(span.trace_id), - "span_id": str(span.span_id), - }, - ), - ) - if tool_calls_info: - output_messages.append({"content": content, "role": role, "tool_calls": tool_calls_info}) - continue - output_messages.append({"content": content, "role": role}) - span._set_ctx_item(OUTPUT_MESSAGES, output_messages) - @staticmethod def _llmobs_set_meta_tags_from_embedding(span: Span, kwargs: Dict[str, Any], resp: Any) -> None: """Extract prompt tags from an embedding and set them as temporary "_ml_obs.meta.*" tags.""" diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index ef6428fd9b9..ef7cc62ff7e 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -11,9 +11,17 @@ from typing import Union from urllib.parse import urlparse +from ddtrace._trace.span import Span +from ddtrace.internal import core from ddtrace.internal.logger import get_logger +from ddtrace.internal.utils.formats import format_trace_id +from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE +from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED +from ddtrace.llmobs._constants import INPUT_MESSAGES from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY +from ddtrace.llmobs._constants import METADATA from ddtrace.llmobs._constants import OAI_HANDOFF_TOOL_ARG +from ddtrace.llmobs._constants import OUTPUT_MESSAGES from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._utils import _get_attr @@ -284,6 +292,187 @@ def get_messages_from_converse_content(role: str, content: list): return messages +def openai_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], completions: Any) -> None: + """Extract prompt/response tags from a completion and set them as temporary "_ml_obs.meta.*" tags.""" + prompt = kwargs.get("prompt", "") + if isinstance(prompt, str): + prompt = [prompt] + parameters = { + k: v for k, v in kwargs.items() if k not in ("model", "prompt", "api_key", "user_api_key", "user_api_key_hash") + } + output_messages = [{"content": ""}] + if not span.error and completions: + choices = getattr(completions, "choices", completions) + output_messages = [{"content": _get_attr(choice, "text", "")} for choice in choices] + span._set_ctx_items( + { + INPUT_MESSAGES: [{"content": str(p)} for p in prompt], + METADATA: parameters, + OUTPUT_MESSAGES: output_messages, + } + ) + + +def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: Optional[Any]) -> None: + """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags.""" + input_messages = [] + for m in kwargs.get("messages", []): + tool_call_id = m.get("tool_call_id") + if tool_call_id: + core.dispatch(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, (tool_call_id, span)) + input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))}) + parameters = { + k: v + for k, v in kwargs.items() + if k not in ("model", "messages", "tools", "functions", "api_key", "user_api_key", "user_api_key_hash") + } + span._set_ctx_items({INPUT_MESSAGES: input_messages, METADATA: parameters}) + + if span.error or not messages: + span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}]) + return + if isinstance(messages, list): # streamed response + role = "" + output_messages = [] + for streamed_message in messages: + # litellm roles appear only on the first choice, so store it to be used for all choices + role = streamed_message.get("role", "") or role + message = {"content": streamed_message.get("content", ""), "role": role} + tool_calls = streamed_message.get("tool_calls", []) + if tool_calls: + message["tool_calls"] = [ + { + "name": tool_call.get("name", ""), + "arguments": json.loads(tool_call.get("arguments", "")), + "tool_id": tool_call.get("tool_id", ""), + "type": tool_call.get("type", ""), + } + for tool_call in tool_calls + ] + output_messages.append(message) + span._set_ctx_item(OUTPUT_MESSAGES, output_messages) + return + choices = _get_attr(messages, "choices", []) + output_messages = [] + for idx, choice in enumerate(choices): + tool_calls_info = [] + choice_message = _get_attr(choice, "message", {}) + role = _get_attr(choice_message, "role", "") + content = _get_attr(choice_message, "content", "") or "" + function_call = _get_attr(choice_message, "function_call", None) + if function_call: + function_name = _get_attr(function_call, "name", "") + arguments = json.loads(_get_attr(function_call, "arguments", "")) + function_call_info = {"name": function_name, "arguments": arguments} + output_messages.append({"content": content, "role": role, "tool_calls": [function_call_info]}) + continue + tool_calls = _get_attr(choice_message, "tool_calls", []) or [] + for tool_call in tool_calls: + tool_args = getattr(tool_call.function, "arguments", "") + tool_name = getattr(tool_call.function, "name", "") + tool_id = getattr(tool_call, "id", "") + tool_call_info = { + "name": tool_name, + "arguments": json.loads(tool_args), + "tool_id": tool_id, + "type": "function", + } + tool_calls_info.append(tool_call_info) + core.dispatch( + DISPATCH_ON_LLM_TOOL_CHOICE, + ( + tool_id, + tool_name, + tool_args, + { + "trace_id": format_trace_id(span.trace_id), + "span_id": str(span.span_id), + }, + ), + ) + if tool_calls_info: + output_messages.append({"content": content, "role": role, "tool_calls": tool_calls_info}) + continue + output_messages.append({"content": content, "role": role}) + span._set_ctx_item(OUTPUT_MESSAGES, output_messages) + + +def openai_construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: + """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks.""" + if not streamed_chunks: + return {"text": ""} + completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))} + if streamed_chunks[-1].finish_reason is not None: + completion["finish_reason"] = streamed_chunks[-1].finish_reason + if hasattr(streamed_chunks[0], "usage"): + completion["usage"] = streamed_chunks[0].usage + return completion + + +def openai_construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_chunk=None, function_call_chunk=None): + """Builds a tool_call dictionary from streamed function_call/tool_call chunks.""" + if function_call_chunk: + if not stored_tool_calls: + stored_tool_calls.append({"name": getattr(function_call_chunk, "name", ""), "arguments": ""}) + stored_tool_calls[0]["arguments"] += getattr(function_call_chunk, "arguments", "") + return + if not tool_call_chunk: + return + tool_call_idx = getattr(tool_call_chunk, "index", None) + tool_id = getattr(tool_call_chunk, "id", None) + tool_type = getattr(tool_call_chunk, "type", None) + function_call = getattr(tool_call_chunk, "function", None) + function_name = getattr(function_call, "name", "") + # Find tool call index in tool_calls list, as it may potentially arrive unordered (i.e. index 2 before 0) + list_idx = next( + (idx for idx, tool_call in enumerate(stored_tool_calls) if tool_call["index"] == tool_call_idx), + None, + ) + if list_idx is None: + stored_tool_calls.append( + {"name": function_name, "arguments": "", "index": tool_call_idx, "tool_id": tool_id, "type": tool_type} + ) + list_idx = -1 + stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "") + + +def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, Any]: + """Constructs a chat completion message dictionary from streamed chunks. + The resulting message dictionary is of form: + {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."} + """ + message: Dict[str, Any] = {"content": "", "tool_calls": []} + for chunk in streamed_chunks: + if getattr(chunk, "usage", None): + message["usage"] = chunk.usage + if not hasattr(chunk, "delta"): + continue + if getattr(chunk, "index", None) and not message.get("index"): + message["index"] = chunk.index + if getattr(chunk.delta, "role") and not message.get("role"): + message["role"] = chunk.delta.role + if getattr(chunk, "finish_reason", None) and not message.get("finish_reason"): + message["finish_reason"] = chunk.finish_reason + chunk_content = getattr(chunk.delta, "content", "") + if chunk_content: + message["content"] += chunk_content + continue + function_call = getattr(chunk.delta, "function_call", None) + if function_call: + openai_construct_tool_call_from_streamed_chunk(message["tool_calls"], function_call_chunk=function_call) + tool_calls = getattr(chunk.delta, "tool_calls", None) + if not tool_calls: + continue + for tool_call in tool_calls: + openai_construct_tool_call_from_streamed_chunk(message["tool_calls"], tool_call_chunk=tool_call) + if message["tool_calls"]: + message["tool_calls"].sort(key=lambda x: x.get("index", 0)) + else: + message.pop("tool_calls", None) + message["content"] = message["content"].strip() + return message + + class OaiSpanAdapter: """Adapter for Oai Agents SDK Span objects that the llmobs integration code will use. This is to consolidate the code where we access oai library types which provides a clear starting point for diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index 7c3a35f8e18..57c3e1b300b 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -90,6 +90,7 @@ "google_generativeai": "google_generativeai", "vertexai": "vertexai", "langgraph": "langgraph", + "litellm": "litellm", "crewai": "crewai", "openai_agents": "openai_agents", } diff --git a/releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml b/releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml new file mode 100644 index 00000000000..5c88002a4a4 --- /dev/null +++ b/releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + LLM Observability: Adds support to automatically submit LiteLLM SDK requests to LLM Observability. \ No newline at end of file diff --git a/tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml b/tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml new file mode 100644 index 00000000000..7641313c6a3 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml @@ -0,0 +1,133 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '89' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: '{"id":"chatcmpl-BHGBewTYXrkQYXQ5DUtzsz7lL7gjy","created":1743453498,"model":"gpt-3.5-turbo-0125","object":"chat.completion","system_fingerprint":null,"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Not + much, just here to help you with anything you need. How can I assist you today?","role":"assistant","tool_calls":null,"function_call":null}},{"finish_reason":"stop","index":1,"message":{"content":"Not + much, just here to chat and help with anything you need. How can I assist + you today?","role":"assistant","tool_calls":null,"function_call":null}}],"usage":{"completion_tokens":43,"prompt_tokens":13,"total_tokens":56,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}},"service_tier":"default"}' + headers: + content-length: + - '855' + content-type: + - application/json + date: + - Mon, 31 Mar 2025 20:38:18 GMT + llm_provider-access-control-expose-headers: + - X-Request-ID + llm_provider-alt-svc: + - h3=":443"; ma=86400 + llm_provider-cf-cache-status: + - DYNAMIC + llm_provider-cf-ray: + - 9292a64f29e7c989-IAD + llm_provider-connection: + - keep-alive + llm_provider-content-encoding: + - gzip + llm_provider-content-type: + - application/json + llm_provider-date: + - Mon, 31 Mar 2025 20:38:19 GMT + llm_provider-openai-organization: + - datadog-4 + llm_provider-openai-processing-ms: + - '420' + llm_provider-openai-version: + - '2020-10-01' + llm_provider-server: + - cloudflare + llm_provider-strict-transport-security: + - max-age=31536000; includeSubDomains; preload + llm_provider-transfer-encoding: + - chunked + llm_provider-x-content-type-options: + - nosniff + llm_provider-x-ratelimit-limit-requests: + - '15000' + llm_provider-x-ratelimit-limit-tokens: + - '2000000' + llm_provider-x-ratelimit-remaining-requests: + - '14999' + llm_provider-x-ratelimit-remaining-tokens: + - '1999993' + llm_provider-x-ratelimit-reset-requests: + - 4ms + llm_provider-x-ratelimit-reset-tokens: + - 0s + llm_provider-x-request-id: + - req_743e3e93e074d74f8c2dcdaff378a836 + server: + - uvicorn + x-litellm-attempted-fallbacks: + - '0' + x-litellm-attempted-retries: + - '0' + x-litellm-call-id: + - 7c65af0a-51fe-4b5b-8491-d52aeed495c6 + x-litellm-key-spend: + - '0.0' + x-litellm-model-api-base: + - https://api.openai.com + x-litellm-model-group: + - gpt-3.5-turbo + x-litellm-model-id: + - e748de28e0562f80b594285ded0e720cb492b705ea9ad445427dff7c5b722753 + x-litellm-overhead-duration-ms: + - '1.125' + x-litellm-response-cost: + - '7.099999999999999e-05' + x-litellm-response-duration-ms: + - '558.881' + x-litellm-version: + - 1.63.11 + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999993' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml new file mode 100644 index 00000000000..ebbbb24714e --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml @@ -0,0 +1,107 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '487' + content-type: + - application/json + cookie: + - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A; + _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//7FTLbtswELzrK4g924Fkx4/olhRJkNZ12iatkdaBQFNrmTVFEiTVNjD8 + 74UoV5IfBXrooYfqIBA73Nnl7JCbgBDgKcQE2Io6lmvRvbod3X/U4cVlev368eZ81FsjhvcXt0/X + w+kUOmWGWnxF5n5lnTGVa4GOK1nBzCB1WLJGo/N+NI76o9ADuUpRlGmZdt3+2aDrCrNQ3TDqDXaZ + K8UZWojJl4AQQjb+X/YoU/wBMfE8PpKjtTRDiOtNhIBRoowAtZZbR6WDTgMyJR3Ksm1ZCNECnFIi + YVSIpnD1bVrrRigqRPL+avbm3fhusFZTFn64fzsYDqOUPdhWvYr6RfuGloVktUAtvI7HB8UIAUlz + n5uhS1hhDEqXfEfqVmgOaAgBarIiR+nKI8BmDkIxWhLPIZ7DA5XkxlDJuGWqQ15dzmELewzb4NT6 + uSWSwWVhqThWj0qpnK/l5XveIdt6UkJl2qiFPUiFJZfcrhKD1HoB2nMIWhRHJoj+FRPcjacLrj+H + T3z4ic/EYzRDOplM/pvgb5og2LUAxd6oQRuVa5c4tUZfdDyoSKF5jhqwP96BTjkqmnjU63dO0CUp + Osq9FWr3McpWmDapzVNEi5SrFtA273E3p7ir43OZ/Ql9AzCG2mGaaIMpZ/snbrYZLF/r322rRfYN + g0XzjTNMHEdTjiPFJS1EdYXAvliHebLkMkOjDa/vUbANfgIAAP//AwBATheKSwYAAA== + headers: + CF-RAY: + - 9278b28d69eb3ba6-BOS + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 28 Mar 2025 17:02:51 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '540' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999985' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_7f2afa7681587d4ae31ca5d9d75824f0 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_proxy.yaml b/tests/contrib/litellm/cassettes/completion_proxy.yaml new file mode 100644 index 00000000000..15e7ea403f3 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_proxy.yaml @@ -0,0 +1,132 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '89' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: '{"id":"chatcmpl-BHGAf6WA7lmIL9yuwftXYqc5kADAy","created":1743453437,"model":"gpt-3.5-turbo-0125","object":"chat.completion","system_fingerprint":null,"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Not + much, just here to help with any questions or tasks you may have. How can + I assist you today?","role":"assistant","tool_calls":null,"function_call":null}}],"usage":{"completion_tokens":24,"prompt_tokens":13,"total_tokens":37,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}},"service_tier":"default"}' + headers: + content-length: + - '663' + content-type: + - application/json + date: + - Mon, 31 Mar 2025 20:37:16 GMT + llm_provider-access-control-expose-headers: + - X-Request-ID + llm_provider-alt-svc: + - h3=":443"; ma=86400 + llm_provider-cf-cache-status: + - DYNAMIC + llm_provider-cf-ray: + - 9292a4d20e95057d-IAD + llm_provider-connection: + - keep-alive + llm_provider-content-encoding: + - gzip + llm_provider-content-type: + - application/json + llm_provider-date: + - Mon, 31 Mar 2025 20:37:18 GMT + llm_provider-openai-organization: + - datadog-4 + llm_provider-openai-processing-ms: + - '406' + llm_provider-openai-version: + - '2020-10-01' + llm_provider-server: + - cloudflare + llm_provider-strict-transport-security: + - max-age=31536000; includeSubDomains; preload + llm_provider-transfer-encoding: + - chunked + llm_provider-x-content-type-options: + - nosniff + llm_provider-x-ratelimit-limit-requests: + - '15000' + llm_provider-x-ratelimit-limit-tokens: + - '2000000' + llm_provider-x-ratelimit-remaining-requests: + - '14999' + llm_provider-x-ratelimit-remaining-tokens: + - '1999994' + llm_provider-x-ratelimit-reset-requests: + - 4ms + llm_provider-x-ratelimit-reset-tokens: + - 0s + llm_provider-x-request-id: + - req_6fdefc3db6a6e5b77dae976930efe649 + server: + - uvicorn + x-litellm-attempted-fallbacks: + - '0' + x-litellm-attempted-retries: + - '0' + x-litellm-call-id: + - 30958a62-d9f7-47e6-8971-2b58852f2976 + x-litellm-key-spend: + - '0.0' + x-litellm-model-api-base: + - https://api.openai.com + x-litellm-model-group: + - gpt-3.5-turbo + x-litellm-model-id: + - e748de28e0562f80b594285ded0e720cb492b705ea9ad445427dff7c5b722753 + x-litellm-overhead-duration-ms: + - '1.424' + x-litellm-response-cost: + - '4.25e-05' + x-litellm-response-duration-ms: + - '611.3' + x-litellm-version: + - 1.63.11 + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999994' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml new file mode 100644 index 00000000000..7f5f315dbd0 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml @@ -0,0 +1,190 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '144' + content-type: + - application/json + cookie: + - _cfuvid=XCe0nOmJMT2AZm.ZO6Nwin7Mu3h3tYIlybA60Pa1myk-1742476858288-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + Not"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + chat"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + and"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + with"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + anything"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + need"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + are"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + doing"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 926fe2d998864ce4-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Thu, 27 Mar 2025 15:22:57 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=CKfpgOZbNgeO_hZnZwDsP9MQL771OL.QGPQvL7sPRLM-1743088977-1.0.1.1-_AOAMiv0VN3eR0.0l1ZyAhvT8I.sKfG.FnBMJqIAMVU5fFpO4aETM8QMsSGgjjx2dyoOnQ9sOSa6vt2WO_I8dLE2qo4dNe7VwOTDw21Ujrw; + path=/; expires=Thu, 27-Mar-25 15:52:57 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=D6vyj85I9udz_8Fd3dvOGdjJWNUTz5W_P_XpI71JrJw-1743088977188-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '178' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999994' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_601c6b7020f2cb6a2bb1fbd6d195dabc + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml new file mode 100644 index 00000000000..39cf74be7cb --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml @@ -0,0 +1,144 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '144' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + much"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + just"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + here"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + to"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + help"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + with"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + anything"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + need"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + How"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + can"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + I"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + today"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":false}} + + + data: [DONE] + + + ' + headers: + content-type: + - text/event-stream; charset=utf-8 + date: + - Mon, 31 Mar 2025 20:38:18 GMT + server: + - uvicorn + transfer-encoding: + - chunked + x-litellm-call-id: + - a5a87fc0-874f-4432-b608-91b437b91fb2 + x-litellm-key-spend: + - '0.0' + x-litellm-version: + - 1.63.11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml new file mode 100644 index 00000000000..515680c5d04 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml @@ -0,0 +1,130 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '542' + content-type: + - application/json + cookie: + - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A; + _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ty5BH4ChPTiw8GnzCSqhxhoP","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 9278b292789f3ba6-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Fri, 28 Mar 2025 17:02:51 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '281' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999985' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_78940dfd1e163cd37e49e666383b7944 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml new file mode 100644 index 00000000000..3b4b44429c0 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml @@ -0,0 +1,242 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '144' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + Not"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + ready"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + chat"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + with"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + help"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 926fe2dd98378f69-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Thu, 27 Mar 2025 15:22:57 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=tMtTAMSYbzl6Mz.ZmEx.t97SoHOEXO_PAGvnES4TErc-1743088977-1.0.1.1-9HXDCBRrHw.0632QNaKGFswPnd4Q7Gcf7tPaifQTEHGv.NOLfayXgXIeHlotH7TAOqyxUdp.KNZ2w43w08vOKnwATLE4VdXkeKJ05zjIvV8; + path=/; expires=Thu, 27-Mar-25 15:52:57 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=fueXgERk0DK.0YI2CrP74Rvo77MpY9vRD4SXAqqK4S4-1743088977877-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '170' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999993' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_eb315d75e206dc7de5d075296b7b9b6c + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml new file mode 100644 index 00000000000..7cface6a716 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml @@ -0,0 +1,240 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '144' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"Not"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + much"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + much"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":","}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + just"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + just"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + here"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + here"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + to"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + to"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + chat"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + and"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + help"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + with"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + with"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + any"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + anything"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + questions"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + or"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + need"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + tasks"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + How"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + may"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + can"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + have"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + I"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"."}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + How"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + can"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + today"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + I"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + help"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + today"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":false}} + + + data: [DONE] + + + ' + headers: + content-type: + - text/event-stream; charset=utf-8 + date: + - Mon, 31 Mar 2025 20:38:19 GMT + server: + - uvicorn + transfer-encoding: + - chunked + x-litellm-call-id: + - b4f152d1-5074-4fb3-a79d-ad0529fa5aa1 + x-litellm-key-spend: + - '0.0' + x-litellm-version: + - 1.63.11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml new file mode 100644 index 00000000000..fda11e5011a --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml @@ -0,0 +1,162 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '542' + content-type: + - application/json + cookie: + - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A; + _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_bWoRSYt75lffFv8JkMI4uDdz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_bWoRSYt75lffFv8JkMI4uDdz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 9278b2960dbd3ba6-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Fri, 28 Mar 2025 17:02:52 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '406' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999985' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_7e8b09694a1029b3eb2fecf93deef4a3 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml new file mode 100644 index 00000000000..479f5817089 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml @@ -0,0 +1,186 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"Not"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + much"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + much"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":","}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + just"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + just"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + here"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + here"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + to"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + to"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + help"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + help"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"."}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + How"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + How"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + can"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + can"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + I"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + I"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + you"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + today"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + today"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"?"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true},"usage":{"completion_tokens":27,"prompt_tokens":13,"total_tokens":40}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true}} + + + data: [DONE] + + + ' + headers: + content-type: + - text/event-stream; charset=utf-8 + date: + - Mon, 31 Mar 2025 20:38:17 GMT + server: + - uvicorn + transfer-encoding: + - chunked + x-litellm-call-id: + - 7396b03c-8ab2-4593-8e46-a3e1285bd4d4 + x-litellm-key-spend: + - '0.0' + x-litellm-version: + - 1.63.11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml new file mode 100644 index 00000000000..a68aabafd1d --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml @@ -0,0 +1,183 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '541' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ZLW6rwgBZAlsBPJv2F9O3zhz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ZLW6rwgBZAlsBPJv2F9O3zhz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\",\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"unit"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"c"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"elsius"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":85,"completion_tokens":43,"total_tokens":128,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 9278b28799743ba6-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Fri, 28 Mar 2025 17:02:50 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=E5.J70d433QZA9Yb..SoyuL46jC1xpIxn4pnxkMjOWc-1743181370-1.0.1.1-bQRVFg.zcyoLYbcsK6DabkiL3ZaPDY.X.mSq2T37uuxnG9X7_mV50crYaQ8tZJdqTZAxOMLe2RYv8mB5jn6GmqPhSgI41BBm4DMMl4lW8FY; + path=/; expires=Fri, 28-Mar-25 17:32:50 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=wXfAZSge17hyjNvaLx1PDXNyLNpOX59UJ.sov3vRs0U-1743181370249-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '586' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999985' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_e2c3786bb1e2c88f639d2f20e45a9e88 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_proxy.yaml new file mode 100644 index 00000000000..32e111c1775 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_proxy.yaml @@ -0,0 +1,154 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + much"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + just"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + here"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + to"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + chat"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + and"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + help"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + with"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + anything"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + need"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + How"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + can"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + I"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + today"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true},"usage":{"completion_tokens":22,"prompt_tokens":13,"total_tokens":35,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}}} + + + data: [DONE] + + + ' + headers: + content-type: + - text/event-stream; charset=utf-8 + date: + - Mon, 31 Mar 2025 20:38:16 GMT + server: + - uvicorn + transfer-encoding: + - chunked + x-litellm-call-id: + - 8b484c9d-0eae-4d95-8b0e-fe1bdb114b9a + x-litellm-key-spend: + - '0.0' + x-litellm-version: + - 1.63.11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml new file mode 100644 index 00000000000..a14ca18675e --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml @@ -0,0 +1,151 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":true},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '541' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_HuRWIamjJM7bLsbCamjSgf8e","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\",\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"unit"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"c"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"elsius"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":85,"completion_tokens":24,"total_tokens":109,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 9278b28278948f69-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Fri, 28 Mar 2025 17:02:49 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=eBHeBQfyYm2koe.GVJdLen4F5mzqqi7jmZT_YvXZOLk-1743181369-1.0.1.1-LT_wEU5NDwWbrNU7lyULsUd_ptgtackPCBbB6I8i.4_taWWP57cHdMtWDz1rfhzKB9f_pKfJzxijQ_Z27_P6iLDT1hf4ioC2b0otZHD3c4o; + path=/; expires=Fri, 28-Mar-25 17:32:49 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=x0zurymIN4SzRny8DSr5RMnqvVD_AwW_LNnMmnuYuRg-1743181369158-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '295' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999986' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_941396c1f446305a0aed13f33a158719 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_with_tools.yaml new file mode 100644 index 00000000000..78f08f3a0af --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_with_tools.yaml @@ -0,0 +1,110 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '487' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFNNj9owEL3nV1hzhlUoIGhusOqu1FIJqQekllVk7CHx4tiRPaFdIf57 + FYdNAkul5hBZ8/zevPnwKWIMlISEgcg5iaLUw+Xz7Msi36xkWcnHldLrxXoZfzt8nW2mPxcwqBl2 + 94qC3lkPwhalRlLWNLBwyAlr1dFsMh7N49k4DkBhJeqalpU0HD9Mh1S5nR3Go0/TCzO3SqCHhP2K + GGPsFP61RyPxDyQs6IRIgd7zDCFpLzEGzuo6Atx75YkbgkEHCmsITW3bVFr3ALJWp4Jr3SVuvlPv + 3DWKa52K9fhwHMvv89flLBOb4zE+ztUkX/XyNdJvZTC0r4xoG9TD23hyk4wxMLwI3AwpFZVzaCj9 + jZxydDcyjAF3WVWgoboEOG1BW8Fr4S0kW/jBDXty3AjlhR2wx8UWznClcI7unV96TXK4rzzXH7vH + jbEUcoX2vVyQczspbbPS2Z2/ocJeGeXz1CH3oQH9OUTvRoIFqK5GDaWzRUkp2QOGpPNpIwrdJnbg + 6PMFJEtc9+LxZHBHLpVIXIVVaLdPcJGj7KjdFvJKKtsDol7pH93c027KVyb7H/kOEAJLQpmWDqUS + 1xV31xzWD/Vf19omB8Pg0R2VwJQUunocEve80s0TAv/mCYt0r0yGrnSqfUfROfoLAAD//wMASyVc + NkYEAAA= + headers: + CF-RAY: + - 9278a2ee88a28ff6-BOS + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 28 Mar 2025 16:52:11 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A; + path=/; expires=Fri, 28-Mar-25 17:22:11 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '397' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999986' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_f08846e9b273c1b121279f2a187948dc + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index f972898eaa8..e549ecb8c94 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -1,34 +1,77 @@ +from ddtrace.llmobs._writer import LLMObsSpanWriter import pytest - from ddtrace.contrib.internal.litellm.patch import patch -from ddtrace.contrib.internal.litellm.patch import unpatch from ddtrace.trace import Pin -from tests.contrib.litellm.utils import get_request_vcr +from ddtrace.contrib.internal.litellm.patch import unpatch from tests.utils import DummyTracer -from tests.utils import DummyWriter +from tests.utils import override_global_config +from tests.contrib.litellm.utils import get_request_vcr +from ddtrace.llmobs import LLMObs as llmobs_service +from tests.llmobs._utils import TestLLMObsSpanWriter + +def default_global_config(): + return {} @pytest.fixture -def litellm(monkeypatch): - monkeypatch.setenv("OPENAI_API_KEY", "") - monkeypatch.setenv("ANTHROPIC_API_KEY", "") - monkeypatch.setenv("COHERE_API_KEY", "") - patch() - import litellm +def ddtrace_global_config(): + return {} - yield litellm - unpatch() + +@pytest.fixture +def llmobs_span_writer(): + yield TestLLMObsSpanWriter(is_agentless=True, interval=1.0, timeout=1.0) + + +@pytest.fixture +def llmobs_events(litellm_llmobs, llmobs_span_writer): + return llmobs_span_writer.events + + +@pytest.fixture +def litellm(ddtrace_global_config, monkeypatch): + global_config = default_global_config() + global_config.update(ddtrace_global_config) + with override_global_config(global_config): + monkeypatch.setenv("OPENAI_API_KEY", "") + monkeypatch.setenv("ANTHROPIC_API_KEY", "") + monkeypatch.setenv("COHERE_API_KEY", "") + patch() + import litellm + + yield litellm + unpatch() + + +@pytest.fixture +def litellm_llmobs(mock_tracer, llmobs_span_writer, ddtrace_global_config): + llmobs_service.disable() + with override_global_config( + { + "_llmobs_ml_app": "", + "_dd_api_key": "", + } + ): + enable_integrations = ddtrace_global_config.get("_llmobs_integrations_enabled", False) + llmobs_service.enable(_tracer=mock_tracer, integrations_enabled=enable_integrations) + llmobs_service._instance._llmobs_span_writer = llmobs_span_writer + yield llmobs_service + llmobs_service.disable() @pytest.fixture def mock_tracer(litellm): + mock_tracer = DummyTracer() pin = Pin.get_from(litellm) - mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False)) pin._override(litellm, tracer=mock_tracer) - pin.tracer.configure() yield mock_tracer @pytest.fixture def request_vcr(): return get_request_vcr() + + +@pytest.fixture +def request_vcr_include_localhost(): + return get_request_vcr(ignore_localhost=False) diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py index 5b1c787ddba..5ddee4173e6 100644 --- a/tests/contrib/litellm/test_litellm.py +++ b/tests/contrib/litellm/test_litellm.py @@ -33,12 +33,15 @@ def test_litellm_completion(litellm, snapshot_context, request_vcr, stream, n): with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion"): with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{"content": "Hey, what is up?", "role": "user"}] - litellm.completion( + resp = litellm.completion( model="gpt-3.5-turbo", messages=messages, stream=stream, n=n, ) + if stream: + for _ in resp: + pass @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @@ -46,36 +49,45 @@ async def test_litellm_acompletion(litellm, snapshot_context, request_vcr, strea with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]): with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{"content": "Hey, what is up?", "role": "user"}] - await litellm.acompletion( + resp = await litellm.acompletion( model="gpt-3.5-turbo", messages=messages, stream=stream, n=n, ) + if stream: + async for _ in resp: + pass @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) def test_litellm_text_completion(litellm, snapshot_context, request_vcr, stream, n): with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]): with request_vcr.use_cassette(get_cassette_name(stream, n)): - litellm.text_completion( + resp = litellm.text_completion( model="gpt-3.5-turbo", prompt="Hello world", stream=stream, n=n, ) + if stream: + for _ in resp: + pass @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) async def test_litellm_atext_completion(litellm, snapshot_context, request_vcr, stream, n): with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]): with request_vcr.use_cassette(get_cassette_name(stream, n)): - await litellm.atext_completion( + resp = await litellm.atext_completion( model="gpt-3.5-turbo", prompt="Hello world", stream=stream, n=n, ) + if stream: + async for _ in resp: + pass @pytest.mark.parametrize("model", ["command-r", "anthropic/claude-3-5-sonnet-20240620"]) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py new file mode 100644 index 00000000000..e2f243727d5 --- /dev/null +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -0,0 +1,249 @@ +from ddtrace._trace.pin import Pin +from ddtrace.llmobs._llmobs import LLMObs +import pytest + +from tests.contrib.litellm.utils import async_consume_stream +from tests.contrib.litellm.utils import get_cassette_name +from tests.contrib.litellm.utils import consume_stream +from tests.contrib.litellm.utils import parse_response +from tests.contrib.litellm.utils import tools +from tests.llmobs._utils import _expected_llmobs_llm_span_event +from tests.utils import DummyTracer + + +@pytest.mark.parametrize( + "stream,n,include_usage", + [ + (True, 1, True), + (True, 2, True), + (False, 1, True), + (False, 2, True), + (True, 1, False), + (True, 2, False), + (False, 1, False), + (False, 2, False), + ], +) +class TestLLMObsLiteLLM: + def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + messages = [{"content": "Hey, what is up?", "role": "user"}] + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + ) + if stream: + output_messages, token_metrics = consume_stream(resp, n) + else: + output_messages, token_metrics = parse_response(resp) + + span = mock_tracer.pop_traces()[0][0] + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + + def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): + if stream and n > 1: + pytest.skip( + "Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977" + ) + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage, tools=True)): + messages = [{"content": "What is the weather like in San Francisco, CA?", "role": "user"}] + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + tools=tools, + tool_choice="auto", + ) + if stream: + output_messages, token_metrics = consume_stream(resp, n) + else: + output_messages, token_metrics = parse_response(resp) + + span = mock_tracer.pop_traces()[0][0] + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={ + "stream": stream, + "n": n, + "stream_options": {"include_usage": include_usage}, + "tool_choice": "auto", + }, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + + async def test_acompletion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + messages = [{"content": "Hey, what is up?", "role": "user"}] + resp = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + ) + if stream: + output_messages, token_metrics = await async_consume_stream(resp, n) + else: + output_messages, token_metrics = parse_response(resp) + + span = mock_tracer.pop_traces()[0][0] + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + + def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + prompt = "Hey, what is up?" + resp = litellm.text_completion( + model="gpt-3.5-turbo", + prompt=prompt, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + ) + if stream: + output_messages, token_metrics = consume_stream(resp, n, is_completion=True) + else: + output_messages, token_metrics = parse_response(resp, is_completion=True) + + span = mock_tracer.pop_traces()[0][0] + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=[{"content": prompt}], + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + + async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + prompt = "Hey, what is up?" + resp = await litellm.atext_completion( + model="gpt-3.5-turbo", + prompt=prompt, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + ) + if stream: + output_messages, token_metrics = await async_consume_stream(resp, n, is_completion=True) + else: + output_messages, token_metrics = parse_response(resp, is_completion=True) + + span = mock_tracer.pop_traces()[0][0] + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=[{"content": prompt}], + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + + @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)]) + def test_completion_integrations_enabled( + self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage + ): + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + import openai + + pin = Pin.get_from(openai) + pin._override(openai, tracer=mock_tracer) + + messages = [{"content": "Hey, what is up?", "role": "user"}] + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + ) + if stream: + output_messages, token_metrics = consume_stream(resp, n) + else: + output_messages, token_metrics = parse_response(resp) + + spans = mock_tracer.pop_traces() + # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request + if stream: + span = spans[0][0] + metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}} + model_name = "gpt-3.5-turbo" + else: + span = spans[0][1] + # remove parent span since LiteLLM request span will not be submitted to LLMObs + span._parent = None + metadata = { + "n": n, + "extra_body": {}, + "timeout": 600.0, + "extra_headers": {"X-Stainless-Raw-Response": "true"}, + } + model_name = "gpt-3.5-turbo-0125" + assert len(llmobs_events) == 1 + expected_event = _expected_llmobs_llm_span_event( + span, + model_name=model_name, + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata=metadata, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + assert llmobs_events[0] == expected_event + + def test_completion_proxy( + self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n, include_usage + ): + with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)): + messages = [{"content": "Hey, what is up?", "role": "user"}] + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + api_base="http://0.0.0.0:4000", + ) + if stream: + consume_stream(resp, n) + + # client side requests made to the proxy are not submitted to LLMObs + assert len(llmobs_events) == 0 diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py index ac43e2d5cf5..a9b6309770b 100644 --- a/tests/contrib/litellm/utils.py +++ b/tests/contrib/litellm/utils.py @@ -1,7 +1,6 @@ -import os - import vcr - +import os +import json CASETTE_EXTENSION = ".yaml" @@ -9,21 +8,151 @@ # VCR is used to capture and store network requests made to Anthropic. # This is done to avoid making real calls to the API which could introduce # flakiness and cost. -def get_request_vcr(): +def get_request_vcr(ignore_localhost=True): return vcr.VCR( cassette_library_dir=os.path.join(os.path.dirname(__file__), "cassettes"), record_mode="once", match_on=["path"], filter_headers=["authorization", "x-api-key", "api-key"], - # Ignore requests to the agent - ignore_localhost=True, + ignore_localhost=ignore_localhost, ) # Get the name of the cassette to use for a given test # All LiteLLM requests that use Open AI get routed to the chat completions endpoint, # so we can reuse the same cassette for each combination of stream and n -def get_cassette_name(stream, n): +def get_cassette_name(stream, n, include_usage=True, tools=False, proxy=False): stream_suffix = "_stream" if stream else "" choice_suffix = "_multiple_choices" if n > 1 else "" - return "completion" + stream_suffix + choice_suffix + CASETTE_EXTENSION + # include_usage only affects streamed responses + if stream and not include_usage: + usage_suffix = "_exclude_usage" + else: + usage_suffix = "" + tools_suffix = "_with_tools" if tools else "" + proxy_suffix = "_proxy" if proxy else "" + return "completion" + stream_suffix + choice_suffix + usage_suffix + tools_suffix + proxy_suffix + CASETTE_EXTENSION + + +def consume_stream(resp, n, is_completion=False): + output_messages = [{"content": "", "tool_calls": []} for _ in range(n)] + token_metrics = {} + role = None + for chunk in resp: + output_messages, token_metrics, role = extract_output_from_chunk( + chunk, output_messages, token_metrics, role, is_completion + ) + output_messages = parse_tool_calls(output_messages) + return output_messages, token_metrics + + +async def async_consume_stream(resp, n, is_completion=False): + output_messages = [{"content": "", "tool_calls": []} for _ in range(n)] + token_metrics = {} + role = None + async for chunk in resp: + output_messages, token_metrics, role = extract_output_from_chunk( + chunk, output_messages, token_metrics, role, is_completion + ) + output_messages = parse_tool_calls(output_messages) + return output_messages, token_metrics + + +def extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion=False): + for choice in chunk["choices"]: + content = choice["text"] if is_completion else choice["delta"]["content"] + content = content or "" + output_messages[choice.index]["content"] += content + if "role" not in output_messages[choice.index] and (choice.get("delta", {}).get("role") or role): + role = choice.get("delta", {}).get("role") or role + output_messages[choice.index]["role"] = role + if choice.get("delta", {}).get("tool_calls", []): + tool_calls_chunk = choice["delta"]["tool_calls"] + for tool_call in tool_calls_chunk: + while tool_call.index >= len(output_messages[choice.index]["tool_calls"]): + output_messages[choice.index]["tool_calls"].append({}) + arguments = output_messages[choice.index]["tool_calls"][tool_call.index].get("arguments", "") + output_messages[choice.index]["tool_calls"][tool_call.index]["name"] = ( + output_messages[choice.index]["tool_calls"][tool_call.index].get("name", None) + or tool_call.function.name + ) + output_messages[choice.index]["tool_calls"][tool_call.index]["arguments"] = ( + arguments + tool_call.function.arguments + ) + output_messages[choice.index]["tool_calls"][tool_call.index]["tool_id"] = ( + output_messages[choice.index]["tool_calls"][tool_call.index].get("tool_id", None) or tool_call.id + ) + output_messages[choice.index]["tool_calls"][tool_call.index]["type"] = tool_call.type + + if "usage" in chunk and chunk["usage"]: + token_metrics.update( + { + "input_tokens": chunk["usage"]["prompt_tokens"], + "output_tokens": chunk["usage"]["completion_tokens"], + "total_tokens": chunk["usage"]["total_tokens"], + } + ) + + return output_messages, token_metrics, role + + +def parse_tool_calls(output_messages): + # remove tool_calls from messages if they are empty and parse arguments + for message in output_messages: + if message["tool_calls"]: + for tool_call in message["tool_calls"]: + if "arguments" in tool_call: + tool_call["arguments"] = json.loads(tool_call["arguments"]) + else: + del message["tool_calls"] + return output_messages + + +def parse_response(resp, is_completion=False): + output_messages = [] + for choice in resp.choices: + content = choice.text if is_completion else choice.message.content + message = {"content": content or ""} + if choice.get("role", None) or choice.get("message", {}).get("role", None): + message["role"] = choice["role"] if is_completion else choice["message"]["role"] + tool_calls = choice.get("message", {}).get("tool_calls", []) + if tool_calls: + message["tool_calls"] = [] + for tool_call in tool_calls: + message["tool_calls"].append( + { + "name": tool_call["function"]["name"], + "arguments": json.loads(tool_call["function"]["arguments"]), + "tool_id": tool_call["id"], + "type": tool_call["type"], + } + ) + output_messages.append(message) + token_metrics = { + "input_tokens": resp.usage.prompt_tokens, + "output_tokens": resp.usage.completion_tokens, + "total_tokens": resp.usage.total_tokens, + } + return output_messages, token_metrics + + +tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + }, + } +]