jwadow · jixuan1989 · Mar 15, 2026 · Mar 15, 2026
diff --git a/README.md b/README.md
@@ -430,6 +430,7 @@ Leave `VPN_PROXY_URL` empty (default) if you don't need proxy support.
 | `/` | GET | Health check |
 | `/health` | GET | Detailed health check |
 | `/v1/models` | GET | List available models |
+| `/v1/usage` | GET | Current Kiro plan and usage limits |
 | `/v1/chat/completions` | POST | OpenAI Chat Completions API |
 | `/v1/messages` | POST | Anthropic Messages API |
 
@@ -457,6 +458,32 @@ curl http://localhost:8000/v1/chat/completions \
 
 </details>
 
+<details>
+<summary>📊 Check Usage Limits</summary>
+
+```bash
+curl "http://localhost:8000/v1/usage" \
+  -H "Authorization: Bearer my-super-secret-password-123"
+```
+
+The endpoint proxies CodeWhisperer Runtime `GetUsageLimits`, preserves the upstream JSON,
+and adds a derived `usageSummary` block with normalized key fields such as reset time,
+primary quota usage, and free trial usage.
+
+Default query parameters:
+- `origin=AI_EDITOR`
+- `resource_type=AGENTIC_REQUEST`
+- `is_email_required=true`
+
+Custom example:
+
+```bash
+curl "http://localhost:8000/v1/usage?origin=AI_EDITOR&resource_type=AGENTIC_REQUEST&is_email_required=false" \
+  -H "Authorization: Bearer my-super-secret-password-123"
+```
+
+</details>
+
 <details>
 <summary>🔹 Streaming Request</summary>
 

diff --git a/docs/en/ARCHITECTURE.md b/docs/en/ARCHITECTURE.md
@@ -385,6 +385,7 @@ Supports async context manager (`async with`).
 | `/` | GET | Health check (status, message, version) |
 | `/health` | GET | Detailed health check (status, timestamp, version) |
 | `/v1/models` | GET | List of available models (requires API key) |
+| `/v1/usage` | GET | Current Kiro plan and usage limits (requires API key) |
 | `/v1/chat/completions` | POST | Chat completions (requires API key) |
 
 **Authentication:** Bearer token in `Authorization` header
@@ -666,10 +667,22 @@ TOOL_DESCRIPTION_MAX_LENGTH="10000"
 | Endpoint | Method | Description |
 |----------|--------|-------------|
 | `/v1/models` | GET | List of available models |
+| `/v1/usage` | GET | Current Kiro plan and usage limits |
 | `/v1/chat/completions` | POST | Chat completions (streaming/non-streaming) |
 
 **Authentication:** `Authorization: Bearer {PROXY_API_KEY}`
 
+`/v1/usage` proxies CodeWhisperer Runtime `GetUsageLimits` using:
+- `origin=AI_EDITOR` by default
+- `resource_type=AGENTIC_REQUEST` by default
+- `is_email_required=true` by default
+
+The route also accepts those values as query parameters when callers need to override them.
+
+The gateway preserves the upstream response body and adds a derived `usageSummary`
+block so clients can read normalized reset/free-trial values without parsing the
+nested runtime payload shape.
+
 ### 7.3 Anthropic-compatible Endpoints
 
 | Endpoint | Method | Description |

diff --git a/docs/zh/README.md b/docs/zh/README.md
@@ -430,6 +430,7 @@ VPN_PROXY_URL=192.168.1.100:8080
 | `/` | GET | 健康检查 |
 | `/health` | GET | 详细健康检查 |
 | `/v1/models` | GET | 列出可用模型 |
+| `/v1/usage` | GET | 当前 Kiro 套餐与用量限制 |
 | `/v1/chat/completions` | POST | OpenAI Chat Completions API |
 | `/v1/messages` | POST | Anthropic Messages API |
 
@@ -457,6 +458,32 @@ curl http://localhost:8000/v1/chat/completions \
 
 </details>
 
+<details>
+<summary>📊 查询用量限制</summary>
+
+```bash
+curl "http://localhost:8000/v1/usage" \
+  -H "Authorization: Bearer my-super-secret-password-123"
+```
+
+该端点会代理 CodeWhisperer Runtime 的 `GetUsageLimits`，保留上游原始 JSON，
+并额外补充一个 `usageSummary` 摘要字段，方便直接读取重置时间、主额度用量和
+Free trial 用量等关键信息。
+
+默认查询参数：
+- `origin=AI_EDITOR`
+- `resource_type=AGENTIC_REQUEST`
+- `is_email_required=true`
+
+自定义参数示例：
+
+```bash
+curl "http://localhost:8000/v1/usage?origin=AI_EDITOR&resource_type=AGENTIC_REQUEST&is_email_required=false" \
+  -H "Authorization: Bearer my-super-secret-password-123"
+```
+
+</details>
+
 <details>
 <summary>🔹 流式请求</summary>
 

diff --git a/kiro/http_client.py b/kiro/http_client.py
@@ -31,7 +31,7 @@
 """
 
 import asyncio
-from typing import Optional
+from typing import Any, Optional
 
 import httpx
 from fastapi import HTTPException
@@ -170,8 +170,9 @@ async def request_with_retry(
         self,
         method: str,
         url: str,
-        json_data: dict,
-        stream: bool = False
+        json_data: Optional[dict[str, Any]] = None,
+        stream: bool = False,
+        params: Optional[dict[str, Any]] = None,
     ) -> httpx.Response:
         """
         Executes an HTTP request with retry logic.
@@ -188,9 +189,10 @@ async def request_with_retry(
         Args:
             method: HTTP method (GET, POST, etc.)
             url: Request URL
-            json_data: Request body (JSON)
+            json_data: Optional request body (JSON)
             stream: Use streaming (default False)
-
+            params: Optional query parameters
+
         Returns:
             httpx.Response with successful response
 
@@ -214,12 +216,24 @@ async def request_with_retry(
                 if stream:
                     # Prevent CLOSE_WAIT connection leak (issue #38)
                     headers["Connection"] = "close"
-                    req = client.build_request(method, url, json=json_data, headers=headers)
+                    req = client.build_request(
+                        method,
+                        url,
+                        params=params,
+                        json=json_data,
+                        headers=headers,
+                    )
                     logger.debug("Sending request to Kiro API...")
                     response = await client.send(req, stream=True)
                 else:
                     logger.debug("Sending request to Kiro API...")
-                    response = await client.request(method, url, json=json_data, headers=headers)
+                    response = await client.request(
+                        method,
+                        url,
+                        params=params,
+                        json=json_data,
+                        headers=headers,
+                    )
 
                 # Check status
                 if response.status_code == 200:
@@ -323,4 +337,4 @@ async def __aenter__(self) -> "KiroHttpClient":
 
     async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
         """Closes the client when exiting context."""
-        await self.close()
+        await self.close()
diff --git a/kiro/routes_openai.py b/kiro/routes_openai.py
@@ -23,6 +23,7 @@
 Contains all API endpoints:
 - / and /health: Health check
 - /v1/models: Models list
+- /v1/usage: Current Kiro usage and plan limits
 - /v1/chat/completions: Chat completions
 """
 
@@ -49,6 +50,7 @@
 from kiro.converters_openai import build_kiro_payload
 from kiro.streaming_openai import stream_kiro_to_openai, collect_stream_response, stream_with_first_token_retry
 from kiro.http_client import KiroHttpClient
+from kiro.runtime_usage import fetch_usage_limits
 from kiro.utils import generate_conversation_id
 
 # Import debug_logger
@@ -150,6 +152,48 @@ async def get_models(request: Request):
     return ModelList(data=openai_models)
 
 
+@router.get("/v1/usage", dependencies=[Depends(verify_api_key)])
+async def get_usage(
+    request: Request,
+    origin: str = "AI_EDITOR",
+    resource_type: str = "AGENTIC_REQUEST",
+    is_email_required: bool = True,
+):
+    """
+    Return current Kiro usage and plan limits.
+
+    Args:
+        request: FastAPI Request for accessing app.state
+        origin: Upstream runtime origin query parameter
+        resource_type: Upstream runtime resource type query parameter
+        is_email_required: Whether upstream should include user email
+
+    Returns:
+        Upstream GetUsageLimits payload plus a derived `usageSummary` block
+
+    Raises:
+        HTTPException: On authentication, validation, network, or upstream errors
+    """
+    logger.info(
+        "Request to /v1/usage "
+        f"(origin={origin}, resource_type={resource_type}, include_email={is_email_required})"
+    )
+
+    auth_manager: KiroAuthManager = request.app.state.auth_manager
+    shared_client = request.app.state.http_client
+
+    try:
+        return await fetch_usage_limits(
+            auth_manager=auth_manager,
+            shared_client=shared_client,
+            origin=origin,
+            resource_type=resource_type,
+            is_email_required=is_email_required,
+        )
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
 @router.post("/v1/chat/completions", dependencies=[Depends(verify_api_key)])
 async def chat_completions(request: Request, request_data: ChatCompletionRequest):
     """
@@ -418,4 +462,4 @@ async def stream_wrapper():
         # Flush debug logs on internal error ("errors" mode)
         if debug_logger:
             debug_logger.flush_on_error(500, str(e))
-        raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")