Bug Description
When using 9Router to connect Antigravity (or other clients using the Google Vertex AI format) to the Kimi API (k2.6), multi-turn tool calling fails with the following error:
400 Bad Request: thinking is enabled but reasoning_content is missing in assistant tool call message
Root Cause
- Antigravity sends the message history in Google Vertex AI format (
request.contents).
- When the model previously executed a tool, the history contains a
model message with a functionCall part. However, Antigravity strips or does not preserve the thought (reasoning) part for past messages.
- 9Router receives this and converts it to the OpenAI format, generating an
assistant message with tool_calls but no reasoning_content.
- In
server.js, 9Router automatically forces thinking: {type:"enabled", keep:"all"} for Kimi models.
- The Kimi API strictly requires that if
thinking is enabled, any previous assistant messages containing tool_calls must also contain reasoning_content. Since it's missing, the API rejects the request.
The Solution
We need to intercept the Google Vertex format (request.contents) in kimiSession.js before 9Router converts it to the OpenAI format.
The fix involves:
- Identifying
model messages that have a functionCall but lack a thought part.
- Injecting a synthetic
thought part (using the cached reasoning from the previous SSE stream) right before the functionCall.
- When 9Router subsequently converts this patched Google format to OpenAI format, the injected
thought is properly mapped to reasoning_content, fully satisfying the Kimi API requirements.
Proposed Code Fix
Here is the fully rewritten app/src/mitm/kimiSession.js that implements this fix. It successfully tracks the session, caches the reasoning, and patches the Google format on the fly.
/**
* Kimi MITM - PATCH Google Vertex format directly
* 1. Find request.contents (Google format)
* 2. For each model message with functionCall but NO thought → add thought part with cached reasoning
* 3. Keep thinking: enabled so Kimi works with reasoning
*/
const fs = require('fs');
const LOG_FILE = require('path').join(require('os').homedir(), '.9router', 'kimi_debug.log');
function fileLog(msg) {
const line = `[${new Date().toLocaleTimeString()}] ${msg}\n`;
try { fs.appendFileSync(LOG_FILE, line); } catch(e) {}
}
const SESSION_TTL_MS = 120 * 60 * 1000;
const sessionStore = new Map();
function isKimiModel(model) {
if (!model || typeof model !== "string") return false;
const m = model.toLowerCase();
return m.includes("kimi") || m.includes("k2.6") || m.includes("if/kimi");
}
function getSessionKey(req, parsedBody) {
return parsedBody?.conversationId ? `kimi:${parsedBody.conversationId}` : `kimi:default`;
}
function getSession(key) {
const s = sessionStore.get(key);
if (!s) return null;
if (Date.now() - s.updatedAt > SESSION_TTL_MS) {
sessionStore.delete(key);
return null;
}
return s;
}
function upsertSession(key, patch) {
const existing = sessionStore.get(key) || { lastAssistantReasoning: null, createdAt: Date.now() };
const updated = { ...existing, ...patch, updatedAt: Date.now() };
sessionStore.set(key, updated);
fileLog(`[SESSION] saved reasoning len=${(updated.lastAssistantReasoning||"").length}`);
return updated;
}
async function buildKimiBody(parsedBody, req, logFn) {
const sk = getSessionKey(req, parsedBody);
const session = getSession(sk);
const lastReasoning = session?.lastAssistantReasoning;
// Find request.contents (Google Vertex format)
let contents = null;
if (parsedBody.request?.contents && Array.isArray(parsedBody.request.contents)) {
contents = parsedBody.request.contents;
} else if (parsedBody.contents && Array.isArray(parsedBody.contents)) {
contents = parsedBody.contents;
}
if (!contents) {
fileLog(`[BUILD] no request.contents found`);
const body = { ...parsedBody, thinking: { type: "enabled", keep: "all" } };
return { body, sessionKey: sk };
}
fileLog(`[BUILD] google contents=${contents.length} hasCachedReasoning=${!!lastReasoning}`);
// Patch model messages: add thought part if functionCall exists but no thought
let patchedCount = 0;
if (lastReasoning) {
for (let i = 0; i < contents.length; i++) {
const item = contents[i];
if (item.role === "model") {
let parts = null;
if (item.parts) parts = item.parts;
else if (item.content?.parts) parts = item.content.parts;
else if (Array.isArray(item.content)) parts = item.content;
if (parts && Array.isArray(parts)) {
const hasFC = parts.some(p => p.functionCall);
const hasThought = parts.some(p => p.thought === true);
if (hasFC && !hasThought) {
// Insert thought part BEFORE functionCall
const fcIndex = parts.findIndex(p => p.functionCall);
parts.splice(fcIndex, 0, { thought: true, text: lastReasoning || "thinking..." });
patchedCount++;
fileLog(`[BUILD] patched msg[${i}] - added thought before functionCall`);
}
}
}
}
}
fileLog(`[BUILD] patched ${patchedCount} model messages`);
const body = {
...parsedBody,
thinking: { type: "enabled", keep: "all" }
};
return { body, sessionKey: sk };
}
async function handleKimiSSE(resp, res, sk, model, logFn) {
const ctType = resp.headers.get("content-type") || "application/json";
res.writeHead(200, { "Content-Type": ctType, "Cache-Control": "no-cache", Connection: "keep-alive" });
if (!resp.body) {
const txt = await resp.text().catch(() => "");
res.end(txt);
return;
}
const rdr = resp.body.getReader();
const dec = new TextDecoder();
let buf = "";
let rcAcc = "";
let hasToolCalls = false;
try {
for (;;) {
const { done, value } = await rdr.read();
if (done) break;
buf += dec.decode(value, { stream: true });
const lines = buf.split("\n");
buf = lines.pop();
for (const ln of lines) {
if (ln.startsWith("data: ")) {
const dd = ln.slice(6).trim();
if (dd && dd !== "[DONE]") {
try {
const pj = JSON.parse(dd);
// Try Google format first
const candidates = pj?.response?.candidates || pj?.candidates || [];
for (const cand of candidates) {
const parts = cand?.content?.parts || [];
for (const part of parts) {
if (part.thought === true && typeof part.text === "string") {
rcAcc += part.text;
} else if (part.functionCall) {
hasToolCalls = true;
}
}
}
// Also check OpenAI format
const choices = pj?.choices || [];
for (const choice of choices) {
const delta = choice.delta || {};
if (delta.reasoning_content || delta.reasoning) {
rcAcc += delta.reasoning_content || delta.reasoning;
}
if (delta.tool_calls) {
hasToolCalls = true;
}
}
} catch (e) {}
}
}
res.write(ln + "\n");
}
}
res.end();
fileLog(`[SSE_EXIT] rcLen=${rcAcc.length} hasTC=${hasToolCalls}`);
if (sk && rcAcc) {
upsertSession(sk, { lastAssistantReasoning: rcAcc });
}
} catch (err) {
fileLog(`[SSE] error: ${err.message}`);
if (!res.writableEnded) res.end();
}
}
module.exports = {
isKimiModel,
buildKimiBody,
handleKimiSSE,
getSessionKey,
pruneExpiredSessions: () => {
const now = Date.now();
for (const [key, s] of sessionStore) {
if (now - s.updatedAt > SESSION_TTL_MS) sessionStore.delete(key);
}
},
PRUNE_INTERVAL_MS: 600000
};
Bug Description
When using 9Router to connect Antigravity (or other clients using the Google Vertex AI format) to the Kimi API (k2.6), multi-turn tool calling fails with the following error:
400 Bad Request: thinking is enabled but reasoning_content is missing in assistant tool call messageRoot Cause
request.contents).modelmessage with afunctionCallpart. However, Antigravity strips or does not preserve thethought(reasoning) part for past messages.assistantmessage withtool_callsbut noreasoning_content.server.js, 9Router automatically forcesthinking: {type:"enabled", keep:"all"}for Kimi models.thinkingis enabled, any previous assistant messages containingtool_callsmust also containreasoning_content. Since it's missing, the API rejects the request.The Solution
We need to intercept the Google Vertex format (
request.contents) inkimiSession.jsbefore 9Router converts it to the OpenAI format.The fix involves:
modelmessages that have afunctionCallbut lack athoughtpart.thoughtpart (using the cached reasoning from the previous SSE stream) right before thefunctionCall.thoughtis properly mapped toreasoning_content, fully satisfying the Kimi API requirements.Proposed Code Fix
Here is the fully rewritten
app/src/mitm/kimiSession.jsthat implements this fix. It successfully tracks the session, caches the reasoning, and patches the Google format on the fly.