@@ -62,6 +62,15 @@ class InternalChatCompletionMessage(ChatCompletionMessage):
6262 thinking_blocks : list [dict [str , Any ]] | None = None
6363
6464
65+ class InternalToolCall (ChatCompletionMessageFunctionToolCall ):
66+ """
67+ An internal subclass to carry provider-specific metadata (e.g., Gemini thought signatures)
68+ without modifying the original model.
69+ """
70+
71+ extra_content : dict [str , Any ] | None = None
72+
73+
6574class LitellmModel (Model ):
6675 """This class enables using any model via LiteLLM. LiteLLM allows you to acess OpenAPI,
6776 Anthropic, Gemini, Mistral, and many other models.
@@ -168,9 +177,15 @@ async def get_response(
168177 "output_tokens" : usage .output_tokens ,
169178 }
170179
180+ # Build provider_data for provider specific fields
181+ provider_data : dict [str , Any ] = {"model" : self .model }
182+ if message is not None and hasattr (response , "id" ):
183+ provider_data ["response_id" ] = response .id
184+
171185 items = (
172186 Converter .message_to_output_items (
173- LitellmConverter .convert_message_to_openai (message )
187+ LitellmConverter .convert_message_to_openai (message , model = self .model ),
188+ provider_data = provider_data ,
174189 )
175190 if message is not None
176191 else []
@@ -215,7 +230,9 @@ async def stream_response(
215230 )
216231
217232 final_response : Response | None = None
218- async for chunk in ChatCmplStreamHandler .handle_stream (response , stream ):
233+ async for chunk in ChatCmplStreamHandler .handle_stream (
234+ response , stream , model = self .model
235+ ):
219236 yield chunk
220237
221238 if chunk .type == "response.completed" :
@@ -280,13 +297,19 @@ async def _fetch_response(
280297 )
281298
282299 converted_messages = Converter .items_to_messages (
283- input , preserve_thinking_blocks = preserve_thinking_blocks
300+ input , model = self . model , preserve_thinking_blocks = preserve_thinking_blocks
284301 )
285302
286303 # Fix for interleaved thinking bug: reorder messages to ensure tool_use comes before tool_result # noqa: E501
287304 if "anthropic" in self .model .lower () or "claude" in self .model .lower ():
288305 converted_messages = self ._fix_tool_message_ordering (converted_messages )
289306
307+ # Convert Google's extra_content to litellm's provider_specific_fields format
308+ if "gemini" in self .model .lower ():
309+ converted_messages = self ._convert_gemini_extra_content_to_provider_specific_fields (
310+ converted_messages
311+ )
312+
290313 if system_instructions :
291314 converted_messages .insert (
292315 0 ,
@@ -436,6 +459,65 @@ async def _fetch_response(
436459 )
437460 return response , ret
438461
462+ def _convert_gemini_extra_content_to_provider_specific_fields (
463+ self , messages : list [ChatCompletionMessageParam ]
464+ ) -> list [ChatCompletionMessageParam ]:
465+ """
466+ Convert Gemini model's extra_content format to provider_specific_fields format for litellm.
467+
468+ Transforms tool calls from internal format:
469+ extra_content={"google": {"thought_signature": "..."}}
470+ To litellm format:
471+ provider_specific_fields={"thought_signature": "..."}
472+
473+ Only processes tool_calls that appear after the last user message.
474+ See: https://ai.google.dev/gemini-api/docs/thought-signatures
475+ """
476+
477+ # Find the index of the last user message
478+ last_user_index = - 1
479+ for i in range (len (messages ) - 1 , - 1 , - 1 ):
480+ if isinstance (messages [i ], dict ) and messages [i ].get ("role" ) == "user" :
481+ last_user_index = i
482+ break
483+
484+ for i , message in enumerate (messages ):
485+ if not isinstance (message , dict ):
486+ continue
487+
488+ # Only process assistant messages that come after the last user message
489+ # If no user message found (last_user_index == -1), process all messages
490+ if last_user_index != - 1 and i <= last_user_index :
491+ continue
492+
493+ # Check if this is an assistant message with tool calls
494+ if message .get ("role" ) == "assistant" and message .get ("tool_calls" ):
495+ tool_calls = message .get ("tool_calls" , [])
496+
497+ for tool_call in tool_calls : # type: ignore[attr-defined]
498+ if not isinstance (tool_call , dict ):
499+ continue
500+
501+ # Default to skip validator, overridden if valid thought signature exists
502+ tool_call ["provider_specific_fields" ] = {
503+ "thought_signature" : "skip_thought_signature_validator"
504+ }
505+
506+ # Override with actual thought signature if extra_content exists
507+ if "extra_content" in tool_call :
508+ extra_content = tool_call .pop ("extra_content" )
509+ if isinstance (extra_content , dict ):
510+ # Extract google-specific fields
511+ google_fields = extra_content .get ("google" )
512+ if google_fields and isinstance (google_fields , dict ):
513+ thought_sig = google_fields .get ("thought_signature" )
514+ if thought_sig :
515+ tool_call ["provider_specific_fields" ] = {
516+ "thought_signature" : thought_sig
517+ }
518+
519+ return messages
520+
439521 def _fix_tool_message_ordering (
440522 self , messages : list [ChatCompletionMessageParam ]
441523 ) -> list [ChatCompletionMessageParam ]:
@@ -563,15 +645,26 @@ def _merge_headers(self, model_settings: ModelSettings):
563645class LitellmConverter :
564646 @classmethod
565647 def convert_message_to_openai (
566- cls , message : litellm .types .utils .Message
648+ cls , message : litellm .types .utils .Message , model : str | None = None
567649 ) -> ChatCompletionMessage :
650+ """
651+ Convert a LiteLLM message to OpenAI ChatCompletionMessage format.
652+
653+ Args:
654+ message: The LiteLLM message to convert
655+ model: The target model to convert to. Used to handle provider-specific
656+ transformations.
657+ """
568658 if message .role != "assistant" :
569659 raise ModelBehaviorError (f"Unsupported role: { message .role } " )
570660
571661 tool_calls : (
572662 list [ChatCompletionMessageFunctionToolCall | ChatCompletionMessageCustomToolCall ] | None
573663 ) = (
574- [LitellmConverter .convert_tool_call_to_openai (tool ) for tool in message .tool_calls ]
664+ [
665+ LitellmConverter .convert_tool_call_to_openai (tool , model = model )
666+ for tool in message .tool_calls
667+ ]
575668 if message .tool_calls
576669 else None
577670 )
@@ -641,13 +734,43 @@ def convert_annotations_to_openai(
641734
642735 @classmethod
643736 def convert_tool_call_to_openai (
644- cls , tool_call : litellm .types .utils .ChatCompletionMessageToolCall
737+ cls , tool_call : litellm .types .utils .ChatCompletionMessageToolCall , model : str | None = None
645738 ) -> ChatCompletionMessageFunctionToolCall :
646- return ChatCompletionMessageFunctionToolCall (
647- id = tool_call .id ,
739+ # Clean up litellm's addition of __thought__ suffix to tool_call.id for
740+ # Gemini models. See: https://github.com/BerriAI/litellm/pull/16895
741+ # This suffix is redundant since we can get thought_signature from
742+ # provider_specific_fields, and this hack causes validation errors when
743+ # cross-model passing to other models.
744+ tool_call_id = tool_call .id
745+ if model and "gemini" in model .lower () and "__thought__" in tool_call_id :
746+ tool_call_id = tool_call_id .split ("__thought__" )[0 ]
747+
748+ # Convert litellm's tool call format to chat completion message format
749+ base_tool_call = ChatCompletionMessageFunctionToolCall (
750+ id = tool_call_id ,
648751 type = "function" ,
649752 function = Function (
650753 name = tool_call .function .name or "" ,
651754 arguments = tool_call .function .arguments ,
652755 ),
653756 )
757+
758+ # Preserve provider-specific fields if present (e.g., Gemini thought signatures)
759+ if hasattr (tool_call , "provider_specific_fields" ) and tool_call .provider_specific_fields :
760+ # Convert to nested extra_content structure
761+ extra_content : dict [str , Any ] = {}
762+ provider_fields = tool_call .provider_specific_fields
763+
764+ # Check for thought_signature (Gemini specific)
765+ if model and "gemini" in model .lower ():
766+ if "thought_signature" in provider_fields :
767+ extra_content ["google" ] = {
768+ "thought_signature" : provider_fields ["thought_signature" ]
769+ }
770+
771+ return InternalToolCall (
772+ ** base_tool_call .model_dump (),
773+ extra_content = extra_content if extra_content else None ,
774+ )
775+
776+ return base_tool_call
0 commit comments