Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions kiro/converters_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,14 @@ def sanitize_json_schema(schema: Optional[Dict[str, Any]]) -> Dict[str, Any]:
if key == "additionalProperties":
continue

# Skip $schema - Kiro API doesn't support it
if key == "$schema":
continue

# Skip properties if empty - Kiro API doesn't support empty properties
if key == "properties" and isinstance(value, dict) and len(value) == 0:
continue

# Recursively process nested objects
if key == "properties" and isinstance(value, dict):
result[key] = {
Expand Down
52 changes: 50 additions & 2 deletions kiro/converters_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

from kiro.config import HIDDEN_MODELS
from kiro.model_resolver import get_model_id_for_kiro
from kiro.models_openai import ChatMessage, ChatCompletionRequest, Tool
from kiro.models_openai import ChatMessage, ChatCompletionRequest, ResponseRequest, Tool

# Import from core - reuse shared logic
from kiro.converters_core import (
Expand Down Expand Up @@ -279,10 +279,12 @@ def convert_openai_tools_to_unified(tools: Optional[List[Tool]]) -> Optional[Lis
))
# Flat format compatibility (Cursor-style)
elif tool.name is not None:
# Use input_schema, or fall back to extra 'parameters' field (n8n-style)
schema = tool.input_schema or (tool.model_extra or {}).get("parameters")
unified_tools.append(UnifiedTool(
name=tool.name,
description=tool.description,
input_schema=tool.input_schema
input_schema=schema
))
# Skip invalid tools
else:
Expand Down Expand Up @@ -345,4 +347,50 @@ def build_kiro_payload(
inject_thinking=True
)

return result.payload


def build_kiro_payload_for_response(
request_data: ResponseRequest,
conversation_id: str,
profile_arn: str
) -> dict:
"""
Builds complete payload for Kiro API from OpenAI ResponseRequest.

Similar to build_kiro_payload but uses 'input' instead of 'messages'.

Args:
request_data: Request in OpenAI ResponseRequest format
conversation_id: Unique conversation ID
profile_arn: AWS CodeWhisperer profile ARN

Returns:
Payload dictionary for POST request to Kiro API
"""
# Convert input (instead of messages) to unified format
system_prompt, unified_messages = convert_openai_messages_to_unified(request_data.input)

# Convert tools to unified format
unified_tools = convert_openai_tools_to_unified(request_data.tools)

# Get model ID for Kiro API
model_id = get_model_id_for_kiro(request_data.model, HIDDEN_MODELS)

logger.debug(
f"Converting OpenAI ResponseRequest: model={request_data.model} -> {model_id}, "
f"messages={len(unified_messages)}, tools={len(unified_tools) if unified_tools else 0}"
)

# Use core function to build payload
result = core_build_kiro_payload(
messages=unified_messages,
system_prompt=system_prompt,
model_id=model_id,
tools=unified_tools,
conversation_id=conversation_id,
profile_arn=profile_arn,
inject_thinking=True
)

return result.payload
35 changes: 35 additions & 0 deletions kiro/models_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,41 @@ class ChatCompletionRequest(BaseModel):
model_config = {"extra": "allow"}


class ResponseRequest(BaseModel):
"""
Request for response generation in OpenAI Responses API format.

Similar to ChatCompletionRequest but uses 'input' instead of 'messages'.

Attributes:
model: Model ID for generation
input: List of input messages
instructions: System instructions
tools: List of available tools
temperature: Generation temperature (0-2)
top_p: Top-p sampling
max_output_tokens: Maximum output tokens
truncation: Truncation strategy
n: Number of response variants
stop: Stop sequences
parallel_tool_calls: Whether to allow parallel tool calls
"""
model: str
input: Annotated[List[ChatMessage], Field(min_length=1)]
instructions: Optional[List[Dict[str, Any]]] = None
tools: Optional[List[Tool]] = None
temperature: Optional[float] = None
top_p: Optional[float] = None
max_output_tokens: Optional[int] = None
truncation: Optional[str] = None
n: Optional[int] = 1
stop: Optional[Union[str, List[str]]] = None
parallel_tool_calls: Optional[bool] = None
stream: Optional[bool] = False

model_config = {"extra": "allow"}


# ==================================================================================================
# Models for responses
# ==================================================================================================
Expand Down
184 changes: 183 additions & 1 deletion kiro/routes_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
- / and /health: Health check
- /v1/models: Models list
- /v1/chat/completions: Chat completions
- /v1/responses: Responses API (OpenAI-compatible)
"""

import json
Expand All @@ -42,11 +43,12 @@
OpenAIModel,
ModelList,
ChatCompletionRequest,
ResponseRequest,
)
from kiro.auth import KiroAuthManager, AuthType
from kiro.cache import ModelInfoCache
from kiro.model_resolver import ModelResolver
from kiro.converters_openai import build_kiro_payload
from kiro.converters_openai import build_kiro_payload, build_kiro_payload_for_response
from kiro.streaming_openai import stream_kiro_to_openai, collect_stream_response, stream_with_first_token_retry
from kiro.http_client import KiroHttpClient
from kiro.utils import generate_conversation_id
Expand Down Expand Up @@ -150,6 +152,186 @@ async def get_models(request: Request):
return ModelList(data=openai_models)


@router.post("/v1/responses", dependencies=[Depends(verify_api_key)])
async def responses(request: Request, request_data: ResponseRequest):
"""
Responses endpoint - compatible with OpenAI API.

Accepts requests in OpenAI Responses API format (with 'input' instead of 'messages')
and translates them to Kiro API.

Args:
request: FastAPI Request for accessing app.state
request_data: Request in OpenAI ResponseRequest format

Returns:
JSONResponse with response data
"""
logger.info(f"Request to /v1/responses (model={request_data.model})")
logger.info(f"Request body: {request_data.model_dump_json()}")
logger.info(f"Input messages: {len(request_data.input)}")
logger.info(f"Has tools: {request_data.tools is not None}")
logger.info(f"Temperature: {request_data.temperature}")
logger.info(f"Stream: {request_data.stream}")

auth_manager: KiroAuthManager = request.app.state.auth_manager
model_cache: ModelInfoCache = request.app.state.model_cache

# Generate conversation ID for Kiro API
conversation_id = generate_conversation_id()

# Build payload for Kiro - use input instead of messages
profile_arn_for_payload = ""
if auth_manager.auth_type == AuthType.KIRO_DESKTOP and auth_manager.profile_arn:
profile_arn_for_payload = auth_manager.profile_arn

try:
kiro_payload = build_kiro_payload_for_response(
request_data,
conversation_id,
profile_arn_for_payload
)
logger.info(f"Kiro payload built: {len(str(kiro_payload))} chars")
logger.info(f"Kiro payload model_id: {kiro_payload.get('modelId', 'N/A')}")
logger.info(f"Kiro payload has tools: {'tools' in kiro_payload}")
except ValueError as e:
logger.error(f"Failed to build kiro payload: {e}")
raise HTTPException(status_code=400, detail=str(e))

# Create HTTP client
url = f"{auth_manager.api_host}/generateAssistantResponse"
logger.debug(f"Kiro API URL: {url}")

shared_client = request.app.state.http_client
http_client = KiroHttpClient(auth_manager, shared_client=shared_client)

try:
response = await http_client.request_with_retry(
"POST",
url,
kiro_payload,
stream=True
)

logger.info(f"Kiro API response status: {response.status_code}")

if response.status_code != 200:
try:
error_content = await response.aread()
except Exception:
error_content = b"Unknown error"

await http_client.close()
error_text = error_content.decode('utf-8', errors='replace')

logger.error(f"Kiro API error response: {error_text[:500]}")

error_message = error_text
try:
error_json = json.loads(error_text)
from kiro.kiro_errors import enhance_kiro_error
error_info = enhance_kiro_error(error_json)
error_message = error_info.user_message
logger.debug(f"Original Kiro error: {error_info.original_message}")
except (json.JSONDecodeError, KeyError):
pass

logger.warning(
f"HTTP {response.status_code} - POST /v1/responses - {error_message[:100]}"
)

return JSONResponse(
status_code=response.status_code,
content={
"error": {
"message": error_message,
"type": "kiro_api_error",
"code": response.status_code
}
}
)

# Collect streaming response (Kiro API returns streaming even for non-streaming requests)
chat_completion = await collect_stream_response(
http_client.client,
response,
request_data.model,
model_cache,
auth_manager,
request_messages=[msg.model_dump() for msg in request_data.input],
request_tools=[t.model_dump() for t in request_data.tools] if request_data.tools else None
)

await http_client.close()
logger.info("HTTP 200 - POST /v1/responses - completed")

# Convert ChatCompletion format to Responses API format
output_text = chat_completion.get("choices", [{}])[0].get("message", {}).get("content", "")

if isinstance(output_text, str):
content_blocks = [{"type": "output_text", "text": output_text, "annotations": [], "logprobs": []}]
else:
content_blocks = output_text if output_text else [{"type": "output_text", "text": "", "annotations": [], "logprobs": []}]

response_id = chat_completion.get("id", conversation_id)
model_name = chat_completion.get("model", request_data.model)
created_at = int(datetime.now(timezone.utc).timestamp())
usage = chat_completion.get("usage", {})

responses_response = {
"id": response_id,
"object": "response",
"created_at": created_at,
"model": model_name,
"output": [
{
"id": conversation_id,
"type": "message",
"role": "assistant",
"content": content_blocks if isinstance(content_blocks, list) else [],
"status": "completed"
}
],
"usage": usage,
"status": "completed"
}

# Check if streaming was requested
stream_requested = getattr(request_data, 'stream', False)
if stream_requested:
logger.info("Returning streaming SSE response")
async def sse_generator():
# response.created
yield f"data: {json.dumps({'type': 'response.created', 'response': {**responses_response, 'output': [], 'status': 'in_progress'}})}\n\n"
# response.output_item.added
yield f"data: {json.dumps({'type': 'response.output_item.added', 'output_index': 0, 'item': {'id': conversation_id, 'type': 'message', 'role': 'assistant', 'content': [], 'status': 'in_progress'}})}\n\n"
# response.output_text.delta
full_text = content_blocks[0].get("text", "") if content_blocks else ""
yield f"data: {json.dumps({'type': 'response.output_text.delta', 'output_index': 0, 'content_index': 0, 'delta': full_text})}\n\n"
# response.output_text.done
yield f"data: {json.dumps({'type': 'response.output_text.done', 'output_index': 0, 'content_index': 0, 'text': full_text})}\n\n"
# response.output_item.done
yield f"data: {json.dumps({'type': 'response.output_item.done', 'output_index': 0, 'item': responses_response['output'][0]})}\n\n"
# response.completed
yield f"data: {json.dumps({'type': 'response.completed', 'response': responses_response})}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(sse_generator(), media_type="text/event-stream")

logger.info(f"Responses API response: {json.dumps(responses_response, indent=2)}")
logger.info("Returning JSONResponse with Responses API format")
return JSONResponse(content=responses_response)

except HTTPException as e:
await http_client.close()
logger.error(f"HTTP {e.status_code} - POST /v1/responses - {e.detail}")
raise
except Exception as e:
await http_client.close()
logger.error(f"Internal error: {e}", exc_info=True)
logger.error(f"HTTP 500 - POST /v1/responses - {str(e)[:100]}")
raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")


@router.post("/v1/chat/completions", dependencies=[Depends(verify_api_key)])
async def chat_completions(request: Request, request_data: ChatCompletionRequest):
"""
Expand Down