Skip to content

Fix 400 Bad Request (missing reasoning_content) for Kimi 2.6 model with Google Vertex clients (Antigravity) #735

@mecrol

Description

@mecrol

Bug Description

When using 9Router to connect Antigravity (or other clients using the Google Vertex AI format) to the Kimi API (k2.6), multi-turn tool calling fails with the following error:

400 Bad Request: thinking is enabled but reasoning_content is missing in assistant tool call message

Root Cause

  1. Antigravity sends the message history in Google Vertex AI format (request.contents).
  2. When the model previously executed a tool, the history contains a model message with a functionCall part. However, Antigravity strips or does not preserve the thought (reasoning) part for past messages.
  3. 9Router receives this and converts it to the OpenAI format, generating an assistant message with tool_calls but no reasoning_content.
  4. In server.js, 9Router automatically forces thinking: {type:"enabled", keep:"all"} for Kimi models.
  5. The Kimi API strictly requires that if thinking is enabled, any previous assistant messages containing tool_calls must also contain reasoning_content. Since it's missing, the API rejects the request.

The Solution

We need to intercept the Google Vertex format (request.contents) in kimiSession.js before 9Router converts it to the OpenAI format.

The fix involves:

  1. Identifying model messages that have a functionCall but lack a thought part.
  2. Injecting a synthetic thought part (using the cached reasoning from the previous SSE stream) right before the functionCall.
  3. When 9Router subsequently converts this patched Google format to OpenAI format, the injected thought is properly mapped to reasoning_content, fully satisfying the Kimi API requirements.

Proposed Code Fix

Here is the fully rewritten app/src/mitm/kimiSession.js that implements this fix. It successfully tracks the session, caches the reasoning, and patches the Google format on the fly.

/**
 * Kimi MITM - PATCH Google Vertex format directly
 * 1. Find request.contents (Google format)
 * 2. For each model message with functionCall but NO thought → add thought part with cached reasoning
 * 3. Keep thinking: enabled so Kimi works with reasoning
 */
const fs = require('fs');
const LOG_FILE = require('path').join(require('os').homedir(), '.9router', 'kimi_debug.log');

function fileLog(msg) {
  const line = `[${new Date().toLocaleTimeString()}] ${msg}\n`;
  try { fs.appendFileSync(LOG_FILE, line); } catch(e) {}
}

const SESSION_TTL_MS = 120 * 60 * 1000;
const sessionStore = new Map();

function isKimiModel(model) {
  if (!model || typeof model !== "string") return false;
  const m = model.toLowerCase();
  return m.includes("kimi") || m.includes("k2.6") || m.includes("if/kimi");
}

function getSessionKey(req, parsedBody) {
  return parsedBody?.conversationId ? `kimi:${parsedBody.conversationId}` : `kimi:default`;
}

function getSession(key) {
  const s = sessionStore.get(key);
  if (!s) return null;
  if (Date.now() - s.updatedAt > SESSION_TTL_MS) {
    sessionStore.delete(key);
    return null;
  }
  return s;
}

function upsertSession(key, patch) {
  const existing = sessionStore.get(key) || { lastAssistantReasoning: null, createdAt: Date.now() };
  const updated = { ...existing, ...patch, updatedAt: Date.now() };
  sessionStore.set(key, updated);
  fileLog(`[SESSION] saved reasoning len=${(updated.lastAssistantReasoning||"").length}`);
  return updated;
}

async function buildKimiBody(parsedBody, req, logFn) {
  const sk = getSessionKey(req, parsedBody);
  const session = getSession(sk);
  const lastReasoning = session?.lastAssistantReasoning;
  
  // Find request.contents (Google Vertex format)
  let contents = null;
  if (parsedBody.request?.contents && Array.isArray(parsedBody.request.contents)) {
    contents = parsedBody.request.contents;
  } else if (parsedBody.contents && Array.isArray(parsedBody.contents)) {
    contents = parsedBody.contents;
  }
  
  if (!contents) {
    fileLog(`[BUILD] no request.contents found`);
    const body = { ...parsedBody, thinking: { type: "enabled", keep: "all" } };
    return { body, sessionKey: sk };
  }
  
  fileLog(`[BUILD] google contents=${contents.length} hasCachedReasoning=${!!lastReasoning}`);
  
  // Patch model messages: add thought part if functionCall exists but no thought
  let patchedCount = 0;
  if (lastReasoning) {
    for (let i = 0; i < contents.length; i++) {
      const item = contents[i];
      if (item.role === "model") {
        let parts = null;
        if (item.parts) parts = item.parts;
        else if (item.content?.parts) parts = item.content.parts;
        else if (Array.isArray(item.content)) parts = item.content;
        
        if (parts && Array.isArray(parts)) {
          const hasFC = parts.some(p => p.functionCall);
          const hasThought = parts.some(p => p.thought === true);
          if (hasFC && !hasThought) {
            // Insert thought part BEFORE functionCall
            const fcIndex = parts.findIndex(p => p.functionCall);
            parts.splice(fcIndex, 0, { thought: true, text: lastReasoning || "thinking..." });
            patchedCount++;
            fileLog(`[BUILD] patched msg[${i}] - added thought before functionCall`);
          }
        }
      }
    }
  }
  
  fileLog(`[BUILD] patched ${patchedCount} model messages`);
  
  const body = {
    ...parsedBody,
    thinking: { type: "enabled", keep: "all" }
  };
  
  return { body, sessionKey: sk };
}

async function handleKimiSSE(resp, res, sk, model, logFn) {
  const ctType = resp.headers.get("content-type") || "application/json";
  res.writeHead(200, { "Content-Type": ctType, "Cache-Control": "no-cache", Connection: "keep-alive" });

  if (!resp.body) {
    const txt = await resp.text().catch(() => "");
    res.end(txt);
    return;
  }

  const rdr = resp.body.getReader();
  const dec = new TextDecoder();
  let buf = "";
  let rcAcc = "";
  let hasToolCalls = false;

  try {
    for (;;) {
      const { done, value } = await rdr.read();
      if (done) break;
      buf += dec.decode(value, { stream: true });
      const lines = buf.split("\n");
      buf = lines.pop();

      for (const ln of lines) {
        if (ln.startsWith("data: ")) {
          const dd = ln.slice(6).trim();
          if (dd && dd !== "[DONE]") {
            try {
              const pj = JSON.parse(dd);
              // Try Google format first
              const candidates = pj?.response?.candidates || pj?.candidates || [];
              for (const cand of candidates) {
                const parts = cand?.content?.parts || [];
                for (const part of parts) {
                  if (part.thought === true && typeof part.text === "string") {
                    rcAcc += part.text;
                  } else if (part.functionCall) {
                    hasToolCalls = true;
                  }
                }
              }
              // Also check OpenAI format
              const choices = pj?.choices || [];
              for (const choice of choices) {
                const delta = choice.delta || {};
                if (delta.reasoning_content || delta.reasoning) {
                  rcAcc += delta.reasoning_content || delta.reasoning;
                }
                if (delta.tool_calls) {
                  hasToolCalls = true;
                }
              }
            } catch (e) {}
          }
        }
        res.write(ln + "\n");
      }
    }
    res.end();

    fileLog(`[SSE_EXIT] rcLen=${rcAcc.length} hasTC=${hasToolCalls}`);
    if (sk && rcAcc) {
      upsertSession(sk, { lastAssistantReasoning: rcAcc });
    }
  } catch (err) {
    fileLog(`[SSE] error: ${err.message}`);
    if (!res.writableEnded) res.end();
  }
}

module.exports = {
  isKimiModel,
  buildKimiBody,
  handleKimiSSE,
  getSessionKey,
  pruneExpiredSessions: () => {
    const now = Date.now();
    for (const [key, s] of sessionStore) {
      if (now - s.updatedAt > SESSION_TTL_MS) sessionStore.delete(key);
    }
  },
  PRUNE_INTERVAL_MS: 600000
};

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions