From b54df078668efb36aecdf4f3a9f0fab7f3de31e5 Mon Sep 17 00:00:00 2001 From: Tom X Nguyen Date: Thu, 5 Jun 2025 18:50:57 +0700 Subject: [PATCH 1/2] fix: potential NoneType return from content.parts --- .../llm/providers/google_converter.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/mcp_agent/llm/providers/google_converter.py b/src/mcp_agent/llm/providers/google_converter.py index ce5a4581..7a6fe78d 100644 --- a/src/mcp_agent/llm/providers/google_converter.py +++ b/src/mcp_agent/llm/providers/google_converter.py @@ -166,16 +166,17 @@ def convert_from_google_content( fast_agent_parts: List[ TextContent | ImageContent | EmbeddedResource | CallToolRequestParams ] = [] - for part in content.parts: - if part.text: - fast_agent_parts.append(TextContent(type="text", text=part.text)) - elif part.function_call: - fast_agent_parts.append( - CallToolRequestParams( - name=part.function_call.name, - arguments=part.function_call.args, + if content.parts: + for part in content.parts: + if part.text: + fast_agent_parts.append(TextContent(type="text", text=part.text)) + elif part.function_call: + fast_agent_parts.append( + CallToolRequestParams( + name=part.function_call.name, + arguments=part.function_call.args, + ) ) - ) return fast_agent_parts def convert_from_google_function_call( From 16b078be9baa08d05a34ad598c4df42ecf2d537b Mon Sep 17 00:00:00 2001 From: Tom X Nguyen Date: Thu, 5 Jun 2025 19:53:49 +0700 Subject: [PATCH 2/2] fix: Resolve errors in evaluator optimizer pattern --- .../providers/augmented_llm_google_native.py | 15 +- .../llm/providers/google_converter.py | 244 ++++++++++++++---- 2 files changed, 209 insertions(+), 50 deletions(-) diff --git a/src/mcp_agent/llm/providers/augmented_llm_google_native.py b/src/mcp_agent/llm/providers/augmented_llm_google_native.py index 1a7d4b10..c97caa57 100644 --- a/src/mcp_agent/llm/providers/augmented_llm_google_native.py +++ b/src/mcp_agent/llm/providers/augmented_llm_google_native.py @@ -74,9 +74,20 @@ def _get_schema_type(model): return None # Use the schema as a dict or as a type, as Gemini supports both - response_schema = _get_schema_type(model) + # response_schema = _get_schema_type(model) # Original line if schema is not None: - response_schema = schema + # Convert the Pydantic JSON schema to a Google Schema object + response_schema = self._converter.json_schema_to_google_schema( + json_schema_node=schema, root_schema=schema + ) + else: + # Fallback or handle error if schema is None + # For now, we'll let it proceed, but Google API might error if response_schema is not set + # for structured output. Or, we could try to infer from the model type. + self.logger.warning( + "Pydantic model schema could not be generated. Trying to infer from model type." + ) + response_schema = _get_schema_type(model) # Set config for structured output generate_content_config = self._converter.convert_request_params_to_google_config( diff --git a/src/mcp_agent/llm/providers/google_converter.py b/src/mcp_agent/llm/providers/google_converter.py index 7a6fe78d..bd6ea934 100644 --- a/src/mcp_agent/llm/providers/google_converter.py +++ b/src/mcp_agent/llm/providers/google_converter.py @@ -1,5 +1,5 @@ import base64 -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Tuple, Union # Import necessary types from google.genai from google.genai import types @@ -41,19 +41,22 @@ def _clean_schema_for_google(self, schema: Dict[str, Any]) -> Dict[str, Any]: "$schema", "exclusiveMaximum", "exclusiveMinimum", + # "title" is generally supported or ignored by Google's Schema, keep it for description if needed } - supported_string_formats = {"enum", "date-time"} + # Only specific string formats are directly supported or need special handling by Google. + # Others might be removed if they cause issues. For now, keeping most common. + # Pydantic might generate "format": "date-time" which is fine. + # "enum" itself is not a format, it's a keyword at the same level as type, description. + # The previous 'supported_string_formats = {"enum", "date-time"}' was a bit misleading. + # 'format' is a keyword. 'enum' is a separate keyword. for key, value in schema.items(): if key in unsupported_keys: - continue # Skip this key + continue - if ( - key == "format" - and schema.get("type") == "string" - and value not in supported_string_formats - ): - continue # Remove unsupported string formats + # Example: if Google's schema validation is strict about unknown 'format' values for strings: + # if key == "format" and schema.get("type") == "string" and value not in {"date-time", ... /* other supported formats */}: + # continue if isinstance(value, dict): cleaned_schema[key] = self._clean_schema_for_google(value) @@ -71,12 +74,12 @@ def convert_to_google_content( ) -> List[types.Content]: """ Converts a list of fast-agent PromptMessageMultipart to google.genai types.Content. - Handles different roles and content types (text, images, etc.). + Handles different roles and content types (text, images, PDF resources, and other generic resources). """ google_contents: List[types.Content] = [] for message in messages: parts: List[types.Part] = [] - for part_content in message.content: # renamed part to part_content to avoid conflict + for part_content in message.content: if is_text_content(part_content): parts.append(types.Part.from_text(text=get_text(part_content) or "")) elif is_image_content(part_content): @@ -100,11 +103,9 @@ def convert_to_google_content( ) ) else: - # Check if the resource itself has text content resource_text = None - if hasattr(part_content.resource, "text"): # Direct text attribute + if hasattr(part_content.resource, "text"): resource_text = part_content.resource.text - # Example: if EmbeddedResource wraps a TextContent-like object in its 'resource' field elif ( hasattr(part_content.resource, "type") and part_content.resource.type == "text" @@ -115,7 +116,6 @@ def convert_to_google_content( if resource_text is not None: parts.append(types.Part.from_text(text=resource_text)) else: - # Fallback for other binary types or types without direct text uri_str = ( part_content.resource.uri if hasattr(part_content.resource, "uri") @@ -131,7 +131,6 @@ def convert_to_google_content( text=f"[Resource: {uri_str}, MIME: {mime_str}]" ) ) - if parts: google_role = ( "user" @@ -144,24 +143,162 @@ def convert_to_google_content( def convert_to_google_tools(self, tools: List[ToolDefinition]) -> List[types.Tool]: """ Converts a list of fast-agent ToolDefinition to google.genai types.Tool. + The input schema for each tool is converted to Google's format. """ google_tools: List[types.Tool] = [] for tool in tools: - cleaned_input_schema = self._clean_schema_for_google(tool.inputSchema) + # For tool parameters, the inputSchema itself is the root for $refs. + google_params_schema = self.json_schema_to_google_schema( + tool.inputSchema, root_schema=tool.inputSchema + ) + function_declaration = types.FunctionDeclaration( name=tool.name, description=tool.description if tool.description else "", - parameters=types.Schema(**cleaned_input_schema), + parameters=google_params_schema, ) google_tools.append(types.Tool(function_declarations=[function_declaration])) return google_tools + def _json_type_to_google_type(self, effective_json_type: str) -> types.Type: + """Maps an effective JSON schema type (string) to google.generativeai.types.Type.""" + if effective_json_type == "string": + return types.Type.STRING + elif effective_json_type == "number": + return types.Type.NUMBER + elif effective_json_type == "integer": + return types.Type.INTEGER + elif effective_json_type == "boolean": + return types.Type.BOOLEAN + elif effective_json_type == "array": + return types.Type.ARRAY + elif effective_json_type == "object": + return types.Type.OBJECT + else: + # Fallback for any other unexpected string type not caught by inference. + # This case should ideally not be reached if inference is robust. + return ( + types.Type.STRING + ) # Defaulting to STRING might be safer than OBJECT if type is truly unknown. + # Or raise ValueError(f"Unsupported effective JSON type: {effective_json_type}") + + def _resolve_ref(self, ref: str, root_schema: Dict[str, Any]) -> Dict[str, Any]: + """ + Resolves a local JSON schema reference (e.g., '#/definitions/MyModel'). + Only supports references within the same schema document (root_schema). + """ + if not ref.startswith("#/"): + raise ValueError( + f"Unsupported reference format: {ref}. Only local references starting with '#/' are supported." + ) + + path_parts = ref[2:].split("/") + current_node = root_schema + for part in path_parts: + if isinstance(current_node, dict) and part in current_node: + current_node = current_node[part] + else: + raise ValueError( + f"Reference '{ref}' not found in schema. Path part '{part}' is invalid in current node {current_node}." + ) + if not isinstance(current_node, dict): + raise ValueError( + f"Reference '{ref}' did not resolve to a schema object (dict). Found: {type(current_node)}" + ) + return current_node + + def json_schema_to_google_schema( + self, json_schema_node: Dict[str, Any], root_schema: Dict[str, Any] + ) -> types.Schema: + """ + Recursively converts a JSON schema node (potentially with $refs) to a google.genai.types.Schema object. + Handles type mapping, descriptions, nullability, enums, object properties, and array items. + $refs are resolved against the root_schema. + Unsupported JSON schema keywords are cleaned via _clean_schema_for_google. + """ + current_processing_node = json_schema_node + if "$ref" in json_schema_node: + current_processing_node = self._resolve_ref(json_schema_node["$ref"], root_schema) + # After resolving, we continue processing this resolved node. + + # Clean the node that we are actually processing (either original or resolved by $ref) + cleaned_node = self._clean_schema_for_google(current_processing_node) + + original_node_type = cleaned_node.get( + "type" + ) # Type from the (resolved and cleaned) schema node + enum_values = cleaned_node.get("enum") + effective_json_type_str: str + + if original_node_type is None: + if enum_values and all( + isinstance(e, str) for e in enum_values + ): # Infer type string for string enums + effective_json_type_str = "string" + else: + # Default to object if type is None and not a clear string enum (e.g. for schemas like {}). + effective_json_type_str = "object" + elif isinstance(original_node_type, list): + # Handles nullable types like ["string", "null"]. Pick first non-null type. + effective_json_type_str = next((t for t in original_node_type if t != "null"), "object") + else: # type is a single string + effective_json_type_str = original_node_type + + google_type_enum = self._json_type_to_google_type(effective_json_type_str) + + description = cleaned_node.get("description") or cleaned_node.get( + "title" + ) # Use title as fallback for description + + is_nullable = False + if isinstance(original_node_type, list) and "null" in original_node_type: + is_nullable = True + + final_enum_values = cleaned_node.get("enum") + if google_type_enum == types.Type.STRING and final_enum_values: + final_enum_values = [ + str(val) for val in final_enum_values + ] # Ensure string enums are strings + elif google_type_enum != types.Type.STRING and final_enum_values: + # Non-string enums are not directly passed as Google's Schema enum currently expects strings. + final_enum_values = None + + properties_map: Union[Dict[str, types.Schema], None] = None + items_schema: Union[types.Schema, None] = None + + if google_type_enum == types.Type.OBJECT and "properties" in cleaned_node: + properties_map = { + key: self.json_schema_to_google_schema(prop_schema, root_schema) + for key, prop_schema in cleaned_node["properties"].items() + } + # If properties_map becomes an empty dict (e.g. "properties": {}), it's passed as such. + # types.Schema allows this. + + if google_type_enum == types.Type.ARRAY and "items" in cleaned_node: + items_def = cleaned_node.get("items") + if isinstance(items_def, dict): # "items" must be a schema object + items_schema = self.json_schema_to_google_schema(items_def, root_schema) + # If "items" is not a dict, items_schema remains None. + + required_fields = cleaned_node.get("required") + + return types.Schema( + type=google_type_enum, + description=description, + nullable=is_nullable, + enum=final_enum_values, + properties=properties_map, # Pass properties_map (can be dict or None) + items=items_schema, + required=required_fields, + ) + def convert_from_google_content( self, content: types.Content ) -> List[TextContent | ImageContent | EmbeddedResource | CallToolRequestParams]: """ Converts google.genai types.Content from a model response to a list of - fast-agent content types or tool call requests. + fast-agent content types (TextContent, ImageContent, EmbeddedResource) + or CallToolRequestParams if a function call is present. """ fast_agent_parts: List[ TextContent | ImageContent | EmbeddedResource | CallToolRequestParams @@ -198,17 +335,18 @@ def convert_function_results_to_google( ) -> List[types.Content]: """ Converts a list of fast-agent tool results to google.genai types.Content - with role 'tool'. Handles multimodal content in tool results. + with role 'tool'. Handles multimodal content (text, images, PDFs) in tool results, + packaging them appropriately for Google's API. """ google_tool_response_contents: List[types.Content] = [] for tool_name, tool_result in tool_results: current_content_parts: List[types.Part] = [] textual_outputs: List[str] = [] - media_parts: List[types.Part] = [] + media_parts: List[types.Part] = [] # For images, PDFs etc. for item in tool_result.content: if is_text_content(item): - textual_outputs.append(get_text(item) or "") # Ensure no None is added + textual_outputs.append(get_text(item) or "") elif is_image_content(item): assert isinstance(item, ImageContent) try: @@ -220,7 +358,7 @@ def convert_function_results_to_google( textual_outputs.append(f"[Error processing image from tool result: {e}]") elif is_resource_content(item): assert isinstance(item, EmbeddedResource) - if ( + if ( # Handle PDF resources specifically "application/pdf" == item.resource.mimeType and hasattr(item.resource, "blob") and isinstance(item.resource, BlobResourceContents) @@ -235,12 +373,10 @@ def convert_function_results_to_google( ) except Exception as e: textual_outputs.append(f"[Error processing PDF from tool result: {e}]") - else: - # Check if the resource itself has text content + else: # Handle other generic resources or resources with text resource_text = None - if hasattr(item.resource, "text"): # Direct text attribute + if hasattr(item.resource, "text"): resource_text = item.resource.text - # Example: if EmbeddedResource wraps a TextContent-like object in its 'resource' field elif ( hasattr(item.resource, "type") and item.resource.type == "text" @@ -250,7 +386,7 @@ def convert_function_results_to_google( if resource_text is not None: textual_outputs.append(resource_text) - else: + else: # Fallback for unhandled resource types uri_str = ( item.resource.uri if hasattr(item.resource, "uri") @@ -264,27 +400,19 @@ def convert_function_results_to_google( textual_outputs.append( f"[Unhandled Resource in Tool: {uri_str}, MIME: {mime_str}]" ) - # Add handling for other content types if needed, for now they are skipped or become unhandled resource text function_response_payload: Dict[str, Any] = {"tool_name": tool_name} if textual_outputs: function_response_payload["text_content"] = "\n".join(textual_outputs) - # Only add media_parts if there are some, otherwise Gemini might error on empty parts for function response + # The main FunctionResponse part must contain the textual outputs. + # Media parts are added separately to the content parts list for the tool response. + fn_response_part = types.Part.from_function_response( + name=tool_name, response=function_response_payload + ) + current_content_parts.append(fn_response_part) if media_parts: - # Create the main FunctionResponse part - fn_response_part = types.Part.from_function_response( - name=tool_name, response=function_response_payload - ) - current_content_parts.append(fn_response_part) - current_content_parts.extend( - media_parts - ) # Add media parts after the main response part - else: # If no media parts, the textual output (if any) is the sole content of the function response - fn_response_part = types.Part.from_function_response( - name=tool_name, response=function_response_payload - ) - current_content_parts.append(fn_response_part) + current_content_parts.extend(media_parts) google_tool_response_contents.append( types.Content(role="tool", parts=current_content_parts) @@ -296,6 +424,8 @@ def convert_request_params_to_google_config( ) -> types.GenerateContentConfig: """ Converts fast-agent RequestParams to google.genai types.GenerateContentConfig. + Maps parameters like temperature, maxTokens, topK, topP, stopSequences, + presence/frequency penalties, and systemPrompt. """ config_args: Dict[str, Any] = {} if request_params.temperature is not None: @@ -319,6 +449,7 @@ def convert_request_params_to_google_config( ): config_args["frequency_penalty"] = request_params.frequencyPenalty if request_params.systemPrompt is not None: + # Assuming systemPrompt maps to system_instruction for Google's API config_args["system_instruction"] = request_params.systemPrompt return types.GenerateContentConfig(**config_args) @@ -333,30 +464,47 @@ def convert_from_google_content_list( def _convert_from_google_content(self, content: types.Content) -> PromptMessageMultipart: """ Converts a single google.genai types.Content to a fast-agent PromptMessageMultipart. + Handles different Google content parts (text, function_response, file_data) and roles. + If the content is a model response with a function call, it's treated as an empty assistant message + as the function call itself is handled separately. """ if content.role == "model" and any(part.function_call for part in content.parts): + # Function calls are typically extracted and handled by CallToolRequestParams, + # so the main message content might be empty or represent precursor text. + # Here, we return an empty assistant message if a function_call is present in any part. return PromptMessageMultipart(role="assistant", content=[]) fast_agent_parts: List[ - TextContent | ImageContent | EmbeddedResource | CallToolRequestParams + TextContent + | ImageContent + | EmbeddedResource + | CallToolRequestParams # Though CallToolRequestParams won't be added here due to above check ] = [] for part in content.parts: if part.text: fast_agent_parts.append(TextContent(type="text", text=part.text)) elif part.function_response: - response_text = str(part.function_response.response) + response_data = part.function_response.response + if isinstance(response_data, dict) and "text_content" in response_data: + response_text = str(response_data["text_content"]) + else: + response_text = str( + response_data + ) # Fallback if response is not a dict or no "text_content" fast_agent_parts.append(TextContent(type="text", text=response_text)) - elif part.file_data: + elif part.file_data: # Convert file_data to a generic EmbeddedResource with TextContent fast_agent_parts.append( EmbeddedResource( type="resource", resource=TextContent( uri=part.file_data.file_uri, mimeType=part.file_data.mime_type, - text=f"[Resource: {part.file_data.file_uri}, MIME: {part.file_data.mime_type}]", + text=f"[Resource: {part.file_data.file_uri}, MIME: {part.file_data.mime_type}]", # Placeholder text ), ) ) - fast_agent_role = "user" if content.role == "user" else "assistant" + fast_agent_role = ( + "user" if content.role == "user" else "assistant" + ) # Default to assistant for "model" or "tool" roles not caught above return PromptMessageMultipart(role=fast_agent_role, content=fast_agent_parts)