Merge branch 'master' into ROB-858-holmes-telemetry

moshemorad · web-flow · commit c272e41758ae · 2025-03-17T13:58:25.000+02:00
diff --git a/holmes/core/llm.py b/holmes/core/llm.py
@@ -6,7 +6,6 @@
 import sentry_sdk
 
 from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
-from holmes.core.tools import Tool
 from pydantic import BaseModel
 import litellm
 import os
@@ -45,13 +44,13 @@ def count_tokens_for_message(self, messages: list[dict]) -> int:
     def completion(
         self,
         messages: List[Dict[str, Any]],
-        tools: Optional[List[Tool]] = [],
+        tools: Optional[List[Dict[str, Any]]] = [],
         tool_choice: Optional[Union[str, dict]] = None,
         response_format: Optional[Union[dict, Type[BaseModel]]] = None,
         temperature: Optional[float] = None,
         drop_params: Optional[bool] = None,
         stream: Optional[bool] = None,
-    ) -> ModelResponse:
+    ) -> Union[ModelResponse, CustomStreamWrapper]:
         pass
 
 
@@ -167,24 +166,28 @@ def count_tokens_for_message(self, messages: list[dict]) -> int:
     def completion(
         self,
         messages: List[Dict[str, Any]],
-        tools: Optional[List[Tool]] = [],
+        tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, dict]] = None,
         response_format: Optional[Union[dict, Type[BaseModel]]] = None,
         temperature: Optional[float] = None,
         drop_params: Optional[bool] = None,
         stream: Optional[bool] = None,
-    ) -> ModelResponse:
+    ) -> Union[ModelResponse, CustomStreamWrapper]:
+        tools_args = {}
+        if tools and tool_choice:
+            tools_args["tools"] = tools
+            tools_args["tool_choice"] = tool_choice
+
         result = litellm.completion(
             model=self.model,
             api_key=self.api_key,
             messages=messages,
-            tools=tools,
-            tool_choice=tool_choice,
             base_url=self.base_url,
             temperature=temperature,
             response_format=response_format,
             drop_params=drop_params,
             stream=stream,
+            **tools_args,
         )
 
         if isinstance(result, ModelResponse):
diff --git a/holmes/core/tool_calling_llm.py b/holmes/core/tool_calling_llm.py
@@ -19,7 +19,6 @@
 from holmes.plugins.prompts import load_and_render_prompt
 from holmes.core.llm import LLM
 from openai import BadRequestError
-from openai._types import NOT_GIVEN
 from openai.types.chat.chat_completion_message_tool_call import (
     ChatCompletionMessageToolCall,
 )
@@ -136,8 +135,8 @@ def call(
             perf_timing.measure(f"start iteration {i}")
             logging.debug(f"running iteration {i}")
             # on the last step we don't allow tools - we want to force a reply, not a request to run another tool
-            tools = NOT_GIVEN if i == max_steps - 1 else tools
-            tool_choice = None if tools == NOT_GIVEN else "auto"
+            tools = None if i == max_steps else tools
+            tool_choice = "auto" if tools else None
 
             total_tokens = self.llm.count_tokens_for_message(messages)
             max_context_size = self.llm.get_context_window_size()