fix (provider/moonshotai): report cached tokens when streaming correctly

shaper · shaper · commit 4e808dc06624 · 2026-02-08T12:23:02.000-08:00
diff --git a/.changeset/ten-pianos-battle.md b/.changeset/ten-pianos-battle.md
@@ -0,0 +1,5 @@
+---
+'@ai-sdk/moonshotai': patch
+---
+
+fix (provider/moonshotai): report cached tokens when streaming correctly
diff --git a/packages/moonshotai/src/__fixtures__/moonshot-cached-tokens.chunks.txt b/packages/moonshotai/src/__fixtures__/moonshot-cached-tokens.chunks.txt
@@ -0,0 +1,4 @@
+{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}],"usage":null}
+{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}],"usage":null}
+{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"content":", world!"},"finish_reason":null}],"usage":null}
+{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":100,"completion_tokens":10,"total_tokens":110,"cached_tokens":80}}
diff --git a/packages/moonshotai/src/__fixtures__/moonshot-text.chunks.txt b/packages/moonshotai/src/__fixtures__/moonshot-text.chunks.txt
@@ -0,0 +1,3 @@
+{"id":"chatcmpl-456","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}],"usage":null}
+{"id":"chatcmpl-456","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}],"usage":null}
+{"id":"chatcmpl-456","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":50,"completion_tokens":5,"total_tokens":55}}
diff --git a/packages/moonshotai/src/moonshotai-chat-language-model.test.ts b/packages/moonshotai/src/moonshotai-chat-language-model.test.ts
@@ -0,0 +1,113 @@
+import { LanguageModelV2Prompt } from '@ai-sdk/provider';
+import { convertReadableStreamToArray } from '@ai-sdk/provider-utils/test';
+import { createTestServer } from '@ai-sdk/test-server/with-vitest';
+import fs from 'node:fs';
+import { beforeEach, describe, expect, it } from 'vitest';
+import { createMoonshotAI } from './moonshotai-provider';
+
+const TEST_PROMPT: LanguageModelV2Prompt = [
+  { role: 'user', content: [{ type: 'text', text: 'Hello' }] },
+];
+
+const provider = createMoonshotAI({
+  apiKey: 'test-api-key',
+});
+
+const server = createTestServer({
+  'https://api.moonshot.ai/v1/chat/completions': {},
+});
+
+function prepareChunksFixtureResponse(filename: string) {
+  const chunks = fs
+    .readFileSync(`src/__fixtures__/${filename}.chunks.txt`, 'utf8')
+    .split('\n')
+    .filter(line => line.trim().length > 0)
+    .map(line => `data: ${line}\n\n`);
+  chunks.push('data: [DONE]\n\n');
+
+  server.urls['https://api.moonshot.ai/v1/chat/completions'].response = {
+    type: 'stream-chunks',
+    chunks,
+  };
+}
+
+describe('MoonshotAIChatLanguageModel', () => {
+  describe('doStream', () => {
+    describe('cached tokens at top level (MoonshotAI format)', () => {
+      beforeEach(() => {
+        prepareChunksFixtureResponse('moonshot-cached-tokens');
+      });
+
+      it('should extract cachedInputTokens from top-level cached_tokens', async () => {
+        const result = await provider.chatModel('kimi-k2.5').doStream({
+          prompt: TEST_PROMPT,
+        });
+
+        const parts = await convertReadableStreamToArray(result.stream);
+        const finishPart = parts.find(part => part.type === 'finish');
+
+        expect(finishPart).toBeDefined();
+        expect(finishPart!.type).toBe('finish');
+        if (finishPart!.type === 'finish') {
+          expect(finishPart!.usage).toEqual({
+            inputTokens: 100,
+            outputTokens: 10,
+            totalTokens: 110,
+            reasoningTokens: undefined,
+            cachedInputTokens: 80,
+          });
+        }
+      });
+
+      it('should not emit raw chunks when not requested', async () => {
+        const result = await provider.chatModel('kimi-k2.5').doStream({
+          prompt: TEST_PROMPT,
+        });
+
+        const parts = await convertReadableStreamToArray(result.stream);
+        const rawParts = parts.filter(part => part.type === 'raw');
+
+        expect(rawParts).toHaveLength(0);
+      });
+
+      it('should emit raw chunks when includeRawChunks is true', async () => {
+        const result = await provider.chatModel('kimi-k2.5').doStream({
+          prompt: TEST_PROMPT,
+          includeRawChunks: true,
+        });
+
+        const parts = await convertReadableStreamToArray(result.stream);
+        const rawParts = parts.filter(part => part.type === 'raw');
+
+        expect(rawParts.length).toBeGreaterThan(0);
+      });
+    });
+
+    describe('without cached tokens', () => {
+      beforeEach(() => {
+        prepareChunksFixtureResponse('moonshot-text');
+      });
+
+      it('should handle usage without cached_tokens', async () => {
+        const result = await provider.chatModel('kimi-k2.5').doStream({
+          prompt: TEST_PROMPT,
+        });
+
+        const parts = await convertReadableStreamToArray(result.stream);
+        const finishPart = parts.find(part => part.type === 'finish');
+
+        expect(finishPart).toBeDefined();
+        expect(finishPart!.type).toBe('finish');
+        if (finishPart!.type === 'finish') {
+          expect(finishPart!.usage).toEqual({
+            inputTokens: 50,
+            outputTokens: 5,
+            totalTokens: 55,
+            reasoningTokens: undefined,
+            cachedInputTokens: undefined,
+          });
+        }
+      });
+    });
+  });
+});
diff --git a/packages/moonshotai/src/moonshotai-chat-language-model.ts b/packages/moonshotai/src/moonshotai-chat-language-model.ts
@@ -1,6 +1,6 @@
 import { OpenAICompatibleChatLanguageModel } from '@ai-sdk/openai-compatible';
 import { OpenAICompatibleChatConfig } from '@ai-sdk/openai-compatible/internal';
-import { LanguageModelV2 } from '@ai-sdk/provider';
+import { LanguageModelV2, LanguageModelV2StreamPart } from '@ai-sdk/provider';
 import { convertMoonshotAIChatUsage } from './convert-moonshotai-chat-usage';
 import { MoonshotAIChatModelId } from './moonshotai-chat-options';
 
@@ -25,4 +25,59 @@ export class MoonshotAIChatLanguageModel extends OpenAICompatibleChatLanguageMod
       usage: convertMoonshotAIChatUsage(usage),
     };
   }
+
+  async doStream(
+    options: Parameters<LanguageModelV2['doStream']>[0],
+  ): Promise<Awaited<ReturnType<LanguageModelV2['doStream']>>> {
+    const originalIncludeRawChunks = options.includeRawChunks;
+
+    // Enable raw chunks to capture pre-Zod usage data, since MoonshotAI
+    // returns cached_tokens at the top level of usage (not nested in
+    // prompt_tokens_details) and the parent's z.object() schema strips it.
+    const result = await super.doStream({
+      ...options,
+      includeRawChunks: true,
+    });
+
+    let rawUsage: unknown = undefined;
+
+    return {
+      ...result,
+      stream: result.stream.pipeThrough(
+        new TransformStream<
+          LanguageModelV2StreamPart,
+          LanguageModelV2StreamPart
+        >({
+          transform(chunk, controller) {
+            if (chunk.type === 'raw') {
+              // Capture raw usage data before Zod strips cached_tokens
+              const rawValue = chunk.rawValue as Record<string, unknown>;
+              if (rawValue?.usage != null) {
+                rawUsage = rawValue.usage;
+              }
+
+              // Only forward raw chunks if originally requested
+              if (originalIncludeRawChunks) {
+                controller.enqueue(chunk);
+              }
+              return;
+            }
+
+            if (chunk.type === 'finish') {
+              // Re-convert usage from raw data to capture cached_tokens
+              controller.enqueue({
+                ...chunk,
+                usage: rawUsage
+                  ? convertMoonshotAIChatUsage(rawUsage as any)
+                  : chunk.usage,
+              });
+              return;
+            }
+
+            controller.enqueue(chunk);
+          },
+        }),
+      ),
+    };
+  }
 }

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +'@ai-sdk/moonshotai': patch
 +---
++
 +fix (provider/moonshotai): report cached tokens when streaming correctly
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{"id":"chatcmpl-456","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}],"usage":null}`
	`2`	`+{"id":"chatcmpl-456","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}],"usage":null}`
	`3`	`+{"id":"chatcmpl-456","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":50,"completion_tokens":5,"total_tokens":55}}`