evalstate · janspoerer · Jun 6, 2025 · Jun 6, 2025 · Jun 6, 2025 · Jun 6, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,8 +35,7 @@ dependencies = [
     "google-genai",
     "opentelemetry-instrumentation-google-genai>=0.2b0",
     "tensorzero>=2025.4.7",
-    "google-genai",
-    "opentelemetry-instrumentation-google-genai>=0.2b0",
+    "deprecated"
 ]
 
 [project.optional-dependencies]

diff --git a/src/mcp_agent/agents/base_agent.py b/src/mcp_agent/agents/base_agent.py
@@ -106,6 +106,8 @@ def __init__(
         # Initialize the LLM to None (will be set by attach_llm)
         self._llm: Optional[AugmentedLLMProtocol] = None
 
+        self._last_call_timestamp: float | None = None
+
         # Map function names to tools
         self._function_tool_map: Dict[str, Any] = {}
 

diff --git a/src/mcp_agent/core/request_params.py b/src/mcp_agent/core/request_params.py
@@ -52,3 +52,11 @@ class RequestParams(CreateMessageRequestParams):
     """
     Optional dictionary of template variables for dynamic templates. Currently only works for TensorZero inference backend
     """
+
+    delay_between_calls: float | None = None
+    """
+    Optional delay between tool calls in seconds. This is useful for rate limiting as well as for working with tool calls that have delayed effects.
+
+    Example tool calls where this is helpful are tools with asynchronous effects, like sending emails or using a web browser. Web browser tools
+    may finish the tool calls already before all Ajax calls are finished, leading to problems if the LLM is too quick to continue processing.
+    """
diff --git a/src/mcp_agent/llm/augmented_llm.py b/src/mcp_agent/llm/augmented_llm.py
@@ -1,3 +1,5 @@
+import asyncio
+import time
 from abc import abstractmethod
 from typing import (
     TYPE_CHECKING,
@@ -158,6 +160,8 @@ def __init__(
         # Initialize default parameters
         self.default_request_params = self._initialize_default_params(kwargs)
 
+        self._last_call_timestamp: float | None = 0.0
+
         # Apply model override if provided
         if model:
             self.default_request_params.model = model
@@ -171,6 +175,7 @@ def __init__(
         self.type_converter = type_converter
         self.verb = kwargs.get("verb")
 
+
     def _initialize_default_params(self, kwargs: dict) -> RequestParams:
         """Initialize default parameters for the LLM.
         Should be overridden by provider implementations to set provider-specific defaults."""
@@ -195,6 +200,9 @@ async def generate(
         # We never expect this for structured() calls - this is for interactive use - developers
         # can do this programatically
         # TODO -- create a "fast-agent" control role rather than magic strings
+
+        final_params = self.get_request_params(request_params)
+        await self._apply_delay(final_params)
 
         if multipart_messages[-1].first_text().startswith("***SAVE_HISTORY"):
             parts: list[str] = multipart_messages[-1].first_text().split(" ", 1)
@@ -246,9 +254,12 @@ async def structured(
     ) -> Tuple[ModelT | None, PromptMessageMultipart]:
         """Return a structured response from the LLM using the provided messages."""
 
+        final_params = self.get_request_params(request_params)
+        await self._apply_delay(final_params)
+
         self._precall(multipart_messages)
         result, assistant_response = await self._apply_prompt_provider_specific_structured(
-            multipart_messages, model, request_params
+            multipart_messages, model, final_params
         )
 
         self._message_history.append(assistant_response)
@@ -337,6 +348,20 @@ def _precall(self, multipart_messages: List[PromptMessageMultipart]) -> None:
                 chat_turn=self.chat_turn(),
             )
 
+    async def _apply_delay(self, request_params: RequestParams) -> None:
+        """Checks and applies a delay if configured in request_params."""
+        if request_params.delay_between_calls and self._last_call_timestamp > 0:
+            required_delay = request_params.delay_between_calls
+            time_since_last_call = time.monotonic() - self._last_call_timestamp
+
+            if time_since_last_call < required_delay:
+                wait_time = required_delay - time_since_last_call
+                self.logger.debug(f"Applying delay: waiting for {wait_time:.2f} seconds.")
+                await asyncio.sleep(wait_time)
+
+        # Always update the timestamp for the next call
+        self._last_call_timestamp = time.monotonic()
+
     def chat_turn(self) -> int:
         """Return the current chat turn number"""
         return 1 + sum(1 for message in self._message_history if message.role == "assistant")

diff --git a/src/mcp_agent/llm/providers/google_converter.py b/src/mcp_agent/llm/providers/google_converter.py
@@ -166,6 +166,10 @@ def convert_from_google_content(
         fast_agent_parts: List[
             TextContent | ImageContent | EmbeddedResource | CallToolRequestParams
         ] = []
+
+        if content is None or not hasattr(content, 'parts') or content.parts is None:
+                    return [] # Google API response 'content' object is None. Cannot extract parts.
+
         for part in content.parts:
             if part.text:
                 fast_agent_parts.append(TextContent(type="text", text=part.text))