From 3b3c1903d814f4c95c3c6a7b73c505073dfe7574 Mon Sep 17 00:00:00 2001 From: yzddmr6 <46088090+yzddmr6@users.noreply.github.com> Date: Tue, 3 Mar 2026 15:36:04 +0800 Subject: [PATCH] feat: add /v1/messages/count_tokens endpoint for Anthropic API Claude Code calls this endpoint before each request to check conversation size and decide whether to trigger compaction. Without it, the gateway returns 404, Claude Code cannot estimate context usage, and long conversations eventually hit the upstream CONTENT_LENGTH_EXCEEDS_THRESHOLD error (400). The endpoint builds the full Kiro payload and counts tokens on the serialized JSON using tiktoken, consistent with the token counting approach used in the messages endpoint. Co-Authored-By: Claude Opus 4.6 --- kiro/routes_anthropic.py | 54 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/kiro/routes_anthropic.py b/kiro/routes_anthropic.py index 1bc6bd10..78355c19 100644 --- a/kiro/routes_anthropic.py +++ b/kiro/routes_anthropic.py @@ -449,4 +449,56 @@ async def stream_wrapper(): "message": f"Internal Server Error: {str(e)}" } } - ) \ No newline at end of file + ) + + +@router.post("/v1/messages/count_tokens", dependencies=[Depends(verify_anthropic_api_key)]) +async def count_tokens_endpoint( + request: Request, + request_data: AnthropicMessagesRequest, +): + """ + Anthropic Count Tokens API endpoint. + + Returns estimated token count for the given request payload. + Used by Claude Code to decide when to trigger conversation compaction. + + Builds the full Kiro payload and counts tokens on the serialized JSON, + consistent with the token counting approach used in the messages endpoint. + """ + logger.info(f"Request to /v1/messages/count_tokens (model={request_data.model}, messages={len(request_data.messages)})") + + auth_manager: KiroAuthManager = request.app.state.auth_manager + + # Build Kiro payload (same as messages endpoint) + conversation_id = generate_conversation_id() + profile_arn_for_payload = "" + if auth_manager.auth_type == AuthType.KIRO_DESKTOP and auth_manager.profile_arn: + profile_arn_for_payload = auth_manager.profile_arn + + try: + kiro_payload = anthropic_to_kiro( + request_data, + conversation_id, + profile_arn_for_payload + ) + except ValueError as e: + logger.error(f"Conversion error in count_tokens: {e}") + return JSONResponse( + status_code=400, + content={ + "type": "error", + "error": { + "type": "invalid_request_error", + "message": str(e) + } + } + ) + + # Count tokens from the full serialized Kiro payload (same as messages endpoint) + kiro_request_body = json.dumps(kiro_payload, ensure_ascii=False, indent=2) + input_tokens = count_tokens(kiro_request_body, apply_claude_correction=False) + + logger.info(f"Token count estimate: {input_tokens} (payload size: {len(kiro_request_body)} chars)") + + return JSONResponse(content={"input_tokens": input_tokens})