diff --git a/kiro/converters_core.py b/kiro/converters_core.py index 21cf758b..321354f4 100644 --- a/kiro/converters_core.py +++ b/kiro/converters_core.py @@ -400,6 +400,14 @@ def sanitize_json_schema(schema: Optional[Dict[str, Any]]) -> Dict[str, Any]: if key == "additionalProperties": continue + # Skip $schema - Kiro API doesn't support it + if key == "$schema": + continue + + # Skip properties if empty - Kiro API doesn't support empty properties + if key == "properties" and isinstance(value, dict) and len(value) == 0: + continue + # Recursively process nested objects if key == "properties" and isinstance(value, dict): result[key] = { diff --git a/kiro/converters_openai.py b/kiro/converters_openai.py index aad3b83c..1855f3a5 100644 --- a/kiro/converters_openai.py +++ b/kiro/converters_openai.py @@ -35,7 +35,7 @@ from kiro.config import HIDDEN_MODELS from kiro.model_resolver import get_model_id_for_kiro -from kiro.models_openai import ChatMessage, ChatCompletionRequest, Tool +from kiro.models_openai import ChatMessage, ChatCompletionRequest, ResponseRequest, Tool # Import from core - reuse shared logic from kiro.converters_core import ( @@ -279,10 +279,12 @@ def convert_openai_tools_to_unified(tools: Optional[List[Tool]]) -> Optional[Lis )) # Flat format compatibility (Cursor-style) elif tool.name is not None: + # Use input_schema, or fall back to extra 'parameters' field (n8n-style) + schema = tool.input_schema or (tool.model_extra or {}).get("parameters") unified_tools.append(UnifiedTool( name=tool.name, description=tool.description, - input_schema=tool.input_schema + input_schema=schema )) # Skip invalid tools else: @@ -345,4 +347,50 @@ def build_kiro_payload( inject_thinking=True ) + return result.payload + + +def build_kiro_payload_for_response( + request_data: ResponseRequest, + conversation_id: str, + profile_arn: str +) -> dict: + """ + Builds complete payload for Kiro API from OpenAI ResponseRequest. + + Similar to build_kiro_payload but uses 'input' instead of 'messages'. + + Args: + request_data: Request in OpenAI ResponseRequest format + conversation_id: Unique conversation ID + profile_arn: AWS CodeWhisperer profile ARN + + Returns: + Payload dictionary for POST request to Kiro API + """ + # Convert input (instead of messages) to unified format + system_prompt, unified_messages = convert_openai_messages_to_unified(request_data.input) + + # Convert tools to unified format + unified_tools = convert_openai_tools_to_unified(request_data.tools) + + # Get model ID for Kiro API + model_id = get_model_id_for_kiro(request_data.model, HIDDEN_MODELS) + + logger.debug( + f"Converting OpenAI ResponseRequest: model={request_data.model} -> {model_id}, " + f"messages={len(unified_messages)}, tools={len(unified_tools) if unified_tools else 0}" + ) + + # Use core function to build payload + result = core_build_kiro_payload( + messages=unified_messages, + system_prompt=system_prompt, + model_id=model_id, + tools=unified_tools, + conversation_id=conversation_id, + profile_arn=profile_arn, + inject_thinking=True + ) + return result.payload \ No newline at end of file diff --git a/kiro/models_openai.py b/kiro/models_openai.py index 46167c1b..7235447c 100644 --- a/kiro/models_openai.py +++ b/kiro/models_openai.py @@ -180,6 +180,41 @@ class ChatCompletionRequest(BaseModel): model_config = {"extra": "allow"} +class ResponseRequest(BaseModel): + """ + Request for response generation in OpenAI Responses API format. + + Similar to ChatCompletionRequest but uses 'input' instead of 'messages'. + + Attributes: + model: Model ID for generation + input: List of input messages + instructions: System instructions + tools: List of available tools + temperature: Generation temperature (0-2) + top_p: Top-p sampling + max_output_tokens: Maximum output tokens + truncation: Truncation strategy + n: Number of response variants + stop: Stop sequences + parallel_tool_calls: Whether to allow parallel tool calls + """ + model: str + input: Annotated[List[ChatMessage], Field(min_length=1)] + instructions: Optional[List[Dict[str, Any]]] = None + tools: Optional[List[Tool]] = None + temperature: Optional[float] = None + top_p: Optional[float] = None + max_output_tokens: Optional[int] = None + truncation: Optional[str] = None + n: Optional[int] = 1 + stop: Optional[Union[str, List[str]]] = None + parallel_tool_calls: Optional[bool] = None + stream: Optional[bool] = False + + model_config = {"extra": "allow"} + + # ================================================================================================== # Models for responses # ================================================================================================== diff --git a/kiro/routes_openai.py b/kiro/routes_openai.py index 301ae9b5..00c788b3 100644 --- a/kiro/routes_openai.py +++ b/kiro/routes_openai.py @@ -24,6 +24,7 @@ - / and /health: Health check - /v1/models: Models list - /v1/chat/completions: Chat completions +- /v1/responses: Responses API (OpenAI-compatible) """ import json @@ -42,11 +43,12 @@ OpenAIModel, ModelList, ChatCompletionRequest, + ResponseRequest, ) from kiro.auth import KiroAuthManager, AuthType from kiro.cache import ModelInfoCache from kiro.model_resolver import ModelResolver -from kiro.converters_openai import build_kiro_payload +from kiro.converters_openai import build_kiro_payload, build_kiro_payload_for_response from kiro.streaming_openai import stream_kiro_to_openai, collect_stream_response, stream_with_first_token_retry from kiro.http_client import KiroHttpClient from kiro.utils import generate_conversation_id @@ -150,6 +152,186 @@ async def get_models(request: Request): return ModelList(data=openai_models) +@router.post("/v1/responses", dependencies=[Depends(verify_api_key)]) +async def responses(request: Request, request_data: ResponseRequest): + """ + Responses endpoint - compatible with OpenAI API. + + Accepts requests in OpenAI Responses API format (with 'input' instead of 'messages') + and translates them to Kiro API. + + Args: + request: FastAPI Request for accessing app.state + request_data: Request in OpenAI ResponseRequest format + + Returns: + JSONResponse with response data + """ + logger.info(f"Request to /v1/responses (model={request_data.model})") + logger.info(f"Request body: {request_data.model_dump_json()}") + logger.info(f"Input messages: {len(request_data.input)}") + logger.info(f"Has tools: {request_data.tools is not None}") + logger.info(f"Temperature: {request_data.temperature}") + logger.info(f"Stream: {request_data.stream}") + + auth_manager: KiroAuthManager = request.app.state.auth_manager + model_cache: ModelInfoCache = request.app.state.model_cache + + # Generate conversation ID for Kiro API + conversation_id = generate_conversation_id() + + # Build payload for Kiro - use input instead of messages + profile_arn_for_payload = "" + if auth_manager.auth_type == AuthType.KIRO_DESKTOP and auth_manager.profile_arn: + profile_arn_for_payload = auth_manager.profile_arn + + try: + kiro_payload = build_kiro_payload_for_response( + request_data, + conversation_id, + profile_arn_for_payload + ) + logger.info(f"Kiro payload built: {len(str(kiro_payload))} chars") + logger.info(f"Kiro payload model_id: {kiro_payload.get('modelId', 'N/A')}") + logger.info(f"Kiro payload has tools: {'tools' in kiro_payload}") + except ValueError as e: + logger.error(f"Failed to build kiro payload: {e}") + raise HTTPException(status_code=400, detail=str(e)) + + # Create HTTP client + url = f"{auth_manager.api_host}/generateAssistantResponse" + logger.debug(f"Kiro API URL: {url}") + + shared_client = request.app.state.http_client + http_client = KiroHttpClient(auth_manager, shared_client=shared_client) + + try: + response = await http_client.request_with_retry( + "POST", + url, + kiro_payload, + stream=True + ) + + logger.info(f"Kiro API response status: {response.status_code}") + + if response.status_code != 200: + try: + error_content = await response.aread() + except Exception: + error_content = b"Unknown error" + + await http_client.close() + error_text = error_content.decode('utf-8', errors='replace') + + logger.error(f"Kiro API error response: {error_text[:500]}") + + error_message = error_text + try: + error_json = json.loads(error_text) + from kiro.kiro_errors import enhance_kiro_error + error_info = enhance_kiro_error(error_json) + error_message = error_info.user_message + logger.debug(f"Original Kiro error: {error_info.original_message}") + except (json.JSONDecodeError, KeyError): + pass + + logger.warning( + f"HTTP {response.status_code} - POST /v1/responses - {error_message[:100]}" + ) + + return JSONResponse( + status_code=response.status_code, + content={ + "error": { + "message": error_message, + "type": "kiro_api_error", + "code": response.status_code + } + } + ) + + # Collect streaming response (Kiro API returns streaming even for non-streaming requests) + chat_completion = await collect_stream_response( + http_client.client, + response, + request_data.model, + model_cache, + auth_manager, + request_messages=[msg.model_dump() for msg in request_data.input], + request_tools=[t.model_dump() for t in request_data.tools] if request_data.tools else None + ) + + await http_client.close() + logger.info("HTTP 200 - POST /v1/responses - completed") + + # Convert ChatCompletion format to Responses API format + output_text = chat_completion.get("choices", [{}])[0].get("message", {}).get("content", "") + + if isinstance(output_text, str): + content_blocks = [{"type": "output_text", "text": output_text, "annotations": [], "logprobs": []}] + else: + content_blocks = output_text if output_text else [{"type": "output_text", "text": "", "annotations": [], "logprobs": []}] + + response_id = chat_completion.get("id", conversation_id) + model_name = chat_completion.get("model", request_data.model) + created_at = int(datetime.now(timezone.utc).timestamp()) + usage = chat_completion.get("usage", {}) + + responses_response = { + "id": response_id, + "object": "response", + "created_at": created_at, + "model": model_name, + "output": [ + { + "id": conversation_id, + "type": "message", + "role": "assistant", + "content": content_blocks if isinstance(content_blocks, list) else [], + "status": "completed" + } + ], + "usage": usage, + "status": "completed" + } + + # Check if streaming was requested + stream_requested = getattr(request_data, 'stream', False) + if stream_requested: + logger.info("Returning streaming SSE response") + async def sse_generator(): + # response.created + yield f"data: {json.dumps({'type': 'response.created', 'response': {**responses_response, 'output': [], 'status': 'in_progress'}})}\n\n" + # response.output_item.added + yield f"data: {json.dumps({'type': 'response.output_item.added', 'output_index': 0, 'item': {'id': conversation_id, 'type': 'message', 'role': 'assistant', 'content': [], 'status': 'in_progress'}})}\n\n" + # response.output_text.delta + full_text = content_blocks[0].get("text", "") if content_blocks else "" + yield f"data: {json.dumps({'type': 'response.output_text.delta', 'output_index': 0, 'content_index': 0, 'delta': full_text})}\n\n" + # response.output_text.done + yield f"data: {json.dumps({'type': 'response.output_text.done', 'output_index': 0, 'content_index': 0, 'text': full_text})}\n\n" + # response.output_item.done + yield f"data: {json.dumps({'type': 'response.output_item.done', 'output_index': 0, 'item': responses_response['output'][0]})}\n\n" + # response.completed + yield f"data: {json.dumps({'type': 'response.completed', 'response': responses_response})}\n\n" + yield "data: [DONE]\n\n" + return StreamingResponse(sse_generator(), media_type="text/event-stream") + + logger.info(f"Responses API response: {json.dumps(responses_response, indent=2)}") + logger.info("Returning JSONResponse with Responses API format") + return JSONResponse(content=responses_response) + + except HTTPException as e: + await http_client.close() + logger.error(f"HTTP {e.status_code} - POST /v1/responses - {e.detail}") + raise + except Exception as e: + await http_client.close() + logger.error(f"Internal error: {e}", exc_info=True) + logger.error(f"HTTP 500 - POST /v1/responses - {str(e)[:100]}") + raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}") + + @router.post("/v1/chat/completions", dependencies=[Depends(verify_api_key)]) async def chat_completions(request: Request, request_data: ChatCompletionRequest): """