Fix 400 Bad Request (missing reasoning_content) for Kimi 2.6 model with Google Vertex clients (Antigravity)

# Bug Description
When using 9Router to connect Antigravity (or other clients using the Google Vertex AI format) to the Kimi API (k2.6), multi-turn tool calling fails with the following error:

`400 Bad Request: thinking is enabled but reasoning_content is missing in assistant tool call message`

## Root Cause
1. Antigravity sends the message history in Google Vertex AI format (`request.contents`). 
2. When the model previously executed a tool, the history contains a `model` message with a `functionCall` part. However, Antigravity strips or does not preserve the `thought` (reasoning) part for past messages.
3. 9Router receives this and converts it to the OpenAI format, generating an `assistant` message with `tool_calls` but **no `reasoning_content`**.
4. In `server.js`, 9Router automatically forces `thinking: {type:"enabled", keep:"all"}` for Kimi models.
5. The Kimi API strictly requires that if `thinking` is enabled, any previous assistant messages containing `tool_calls` **must** also contain `reasoning_content`. Since it's missing, the API rejects the request.

## The Solution
We need to intercept the Google Vertex format (`request.contents`) in `kimiSession.js` *before* 9Router converts it to the OpenAI format. 

The fix involves:
1. Identifying `model` messages that have a `functionCall` but lack a `thought` part.
2. Injecting a synthetic `thought` part (using the cached reasoning from the previous SSE stream) right before the `functionCall`.
3. When 9Router subsequently converts this patched Google format to OpenAI format, the injected `thought` is properly mapped to `reasoning_content`, fully satisfying the Kimi API requirements.

## Proposed Code Fix
Here is the fully rewritten `app/src/mitm/kimiSession.js` that implements this fix. It successfully tracks the session, caches the reasoning, and patches the Google format on the fly.

```javascript
/**
 * Kimi MITM - PATCH Google Vertex format directly
 * 1. Find request.contents (Google format)
 * 2. For each model message with functionCall but NO thought → add thought part with cached reasoning
 * 3. Keep thinking: enabled so Kimi works with reasoning
 */
const fs = require('fs');
const LOG_FILE = require('path').join(require('os').homedir(), '.9router', 'kimi_debug.log');

function fileLog(msg) {
  const line = `[${new Date().toLocaleTimeString()}] ${msg}\n`;
  try { fs.appendFileSync(LOG_FILE, line); } catch(e) {}
}

const SESSION_TTL_MS = 120 * 60 * 1000;
const sessionStore = new Map();

function isKimiModel(model) {
  if (!model || typeof model !== "string") return false;
  const m = model.toLowerCase();
  return m.includes("kimi") || m.includes("k2.6") || m.includes("if/kimi");
}

function getSessionKey(req, parsedBody) {
  return parsedBody?.conversationId ? `kimi:${parsedBody.conversationId}` : `kimi:default`;
}

function getSession(key) {
  const s = sessionStore.get(key);
  if (!s) return null;
  if (Date.now() - s.updatedAt > SESSION_TTL_MS) {
    sessionStore.delete(key);
    return null;
  }
  return s;
}

function upsertSession(key, patch) {
  const existing = sessionStore.get(key) || { lastAssistantReasoning: null, createdAt: Date.now() };
  const updated = { ...existing, ...patch, updatedAt: Date.now() };
  sessionStore.set(key, updated);
  fileLog(`[SESSION] saved reasoning len=${(updated.lastAssistantReasoning||"").length}`);
  return updated;
}

async function buildKimiBody(parsedBody, req, logFn) {
  const sk = getSessionKey(req, parsedBody);
  const session = getSession(sk);
  const lastReasoning = session?.lastAssistantReasoning;
  
  // Find request.contents (Google Vertex format)
  let contents = null;
  if (parsedBody.request?.contents && Array.isArray(parsedBody.request.contents)) {
    contents = parsedBody.request.contents;
  } else if (parsedBody.contents && Array.isArray(parsedBody.contents)) {
    contents = parsedBody.contents;
  }
  
  if (!contents) {
    fileLog(`[BUILD] no request.contents found`);
    const body = { ...parsedBody, thinking: { type: "enabled", keep: "all" } };
    return { body, sessionKey: sk };
  }
  
  fileLog(`[BUILD] google contents=${contents.length} hasCachedReasoning=${!!lastReasoning}`);
  
  // Patch model messages: add thought part if functionCall exists but no thought
  let patchedCount = 0;
  if (lastReasoning) {
    for (let i = 0; i < contents.length; i++) {
      const item = contents[i];
      if (item.role === "model") {
        let parts = null;
        if (item.parts) parts = item.parts;
        else if (item.content?.parts) parts = item.content.parts;
        else if (Array.isArray(item.content)) parts = item.content;
        
        if (parts && Array.isArray(parts)) {
          const hasFC = parts.some(p => p.functionCall);
          const hasThought = parts.some(p => p.thought === true);
          if (hasFC && !hasThought) {
            // Insert thought part BEFORE functionCall
            const fcIndex = parts.findIndex(p => p.functionCall);
            parts.splice(fcIndex, 0, { thought: true, text: lastReasoning || "thinking..." });
            patchedCount++;
            fileLog(`[BUILD] patched msg[${i}] - added thought before functionCall`);
          }
        }
      }
    }
  }
  
  fileLog(`[BUILD] patched ${patchedCount} model messages`);
  
  const body = {
    ...parsedBody,
    thinking: { type: "enabled", keep: "all" }
  };
  
  return { body, sessionKey: sk };
}

async function handleKimiSSE(resp, res, sk, model, logFn) {
  const ctType = resp.headers.get("content-type") || "application/json";
  res.writeHead(200, { "Content-Type": ctType, "Cache-Control": "no-cache", Connection: "keep-alive" });

  if (!resp.body) {
    const txt = await resp.text().catch(() => "");
    res.end(txt);
    return;
  }

  const rdr = resp.body.getReader();
  const dec = new TextDecoder();
  let buf = "";
  let rcAcc = "";
  let hasToolCalls = false;

  try {
    for (;;) {
      const { done, value } = await rdr.read();
      if (done) break;
      buf += dec.decode(value, { stream: true });
      const lines = buf.split("\n");
      buf = lines.pop();

      for (const ln of lines) {
        if (ln.startsWith("data: ")) {
          const dd = ln.slice(6).trim();
          if (dd && dd !== "[DONE]") {
            try {
              const pj = JSON.parse(dd);
              // Try Google format first
              const candidates = pj?.response?.candidates || pj?.candidates || [];
              for (const cand of candidates) {
                const parts = cand?.content?.parts || [];
                for (const part of parts) {
                  if (part.thought === true && typeof part.text === "string") {
                    rcAcc += part.text;
                  } else if (part.functionCall) {
                    hasToolCalls = true;
                  }
                }
              }
              // Also check OpenAI format
              const choices = pj?.choices || [];
              for (const choice of choices) {
                const delta = choice.delta || {};
                if (delta.reasoning_content || delta.reasoning) {
                  rcAcc += delta.reasoning_content || delta.reasoning;
                }
                if (delta.tool_calls) {
                  hasToolCalls = true;
                }
              }
            } catch (e) {}
          }
        }
        res.write(ln + "\n");
      }
    }
    res.end();

    fileLog(`[SSE_EXIT] rcLen=${rcAcc.length} hasTC=${hasToolCalls}`);
    if (sk && rcAcc) {
      upsertSession(sk, { lastAssistantReasoning: rcAcc });
    }
  } catch (err) {
    fileLog(`[SSE] error: ${err.message}`);
    if (!res.writableEnded) res.end();
  }
}

module.exports = {
  isKimiModel,
  buildKimiBody,
  handleKimiSSE,
  getSessionKey,
  pruneExpiredSessions: () => {
    const now = Date.now();
    for (const [key, s] of sessionStore) {
      if (now - s.updatedAt > SESSION_TTL_MS) sessionStore.delete(key);
    }
  },
  PRUNE_INTERVAL_MS: 600000
};
```


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix 400 Bad Request (missing reasoning_content) for Kimi 2.6 model with Google Vertex clients (Antigravity) #735

Bug Description

Root Cause

The Solution

Proposed Code Fix

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Fix 400 Bad Request (missing reasoning_content) for Kimi 2.6 model with Google Vertex clients (Antigravity) #735

Description

Bug Description

Root Cause

The Solution

Proposed Code Fix

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions