Skip to content

Commit 4e808dc

Browse files
committed
fix (provider/moonshotai): report cached tokens when streaming correctly
1 parent 5f598b7 commit 4e808dc

File tree

5 files changed

+181
-1
lines changed

5 files changed

+181
-1
lines changed

.changeset/ten-pianos-battle.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ai-sdk/moonshotai': patch
3+
---
4+
5+
fix (provider/moonshotai): report cached tokens when streaming correctly
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}],"usage":null}
2+
{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}],"usage":null}
3+
{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"content":", world!"},"finish_reason":null}],"usage":null}
4+
{"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":100,"completion_tokens":10,"total_tokens":110,"cached_tokens":80}}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"id":"chatcmpl-456","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}],"usage":null}
2+
{"id":"chatcmpl-456","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}],"usage":null}
3+
{"id":"chatcmpl-456","object":"chat.completion.chunk","created":1234567890,"model":"kimi-k2.5","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":50,"completion_tokens":5,"total_tokens":55}}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import { LanguageModelV2Prompt } from '@ai-sdk/provider';
2+
import { convertReadableStreamToArray } from '@ai-sdk/provider-utils/test';
3+
import { createTestServer } from '@ai-sdk/test-server/with-vitest';
4+
import fs from 'node:fs';
5+
import { beforeEach, describe, expect, it } from 'vitest';
6+
import { createMoonshotAI } from './moonshotai-provider';
7+
8+
const TEST_PROMPT: LanguageModelV2Prompt = [
9+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
10+
];
11+
12+
const provider = createMoonshotAI({
13+
apiKey: 'test-api-key',
14+
});
15+
16+
const server = createTestServer({
17+
'https://api.moonshot.ai/v1/chat/completions': {},
18+
});
19+
20+
function prepareChunksFixtureResponse(filename: string) {
21+
const chunks = fs
22+
.readFileSync(`src/__fixtures__/${filename}.chunks.txt`, 'utf8')
23+
.split('\n')
24+
.filter(line => line.trim().length > 0)
25+
.map(line => `data: ${line}\n\n`);
26+
chunks.push('data: [DONE]\n\n');
27+
28+
server.urls['https://api.moonshot.ai/v1/chat/completions'].response = {
29+
type: 'stream-chunks',
30+
chunks,
31+
};
32+
}
33+
34+
describe('MoonshotAIChatLanguageModel', () => {
35+
describe('doStream', () => {
36+
describe('cached tokens at top level (MoonshotAI format)', () => {
37+
beforeEach(() => {
38+
prepareChunksFixtureResponse('moonshot-cached-tokens');
39+
});
40+
41+
it('should extract cachedInputTokens from top-level cached_tokens', async () => {
42+
const result = await provider.chatModel('kimi-k2.5').doStream({
43+
prompt: TEST_PROMPT,
44+
});
45+
46+
const parts = await convertReadableStreamToArray(result.stream);
47+
const finishPart = parts.find(part => part.type === 'finish');
48+
49+
expect(finishPart).toBeDefined();
50+
expect(finishPart!.type).toBe('finish');
51+
if (finishPart!.type === 'finish') {
52+
expect(finishPart!.usage).toEqual({
53+
inputTokens: 100,
54+
outputTokens: 10,
55+
totalTokens: 110,
56+
reasoningTokens: undefined,
57+
cachedInputTokens: 80,
58+
});
59+
}
60+
});
61+
62+
it('should not emit raw chunks when not requested', async () => {
63+
const result = await provider.chatModel('kimi-k2.5').doStream({
64+
prompt: TEST_PROMPT,
65+
});
66+
67+
const parts = await convertReadableStreamToArray(result.stream);
68+
const rawParts = parts.filter(part => part.type === 'raw');
69+
70+
expect(rawParts).toHaveLength(0);
71+
});
72+
73+
it('should emit raw chunks when includeRawChunks is true', async () => {
74+
const result = await provider.chatModel('kimi-k2.5').doStream({
75+
prompt: TEST_PROMPT,
76+
includeRawChunks: true,
77+
});
78+
79+
const parts = await convertReadableStreamToArray(result.stream);
80+
const rawParts = parts.filter(part => part.type === 'raw');
81+
82+
expect(rawParts.length).toBeGreaterThan(0);
83+
});
84+
});
85+
86+
describe('without cached tokens', () => {
87+
beforeEach(() => {
88+
prepareChunksFixtureResponse('moonshot-text');
89+
});
90+
91+
it('should handle usage without cached_tokens', async () => {
92+
const result = await provider.chatModel('kimi-k2.5').doStream({
93+
prompt: TEST_PROMPT,
94+
});
95+
96+
const parts = await convertReadableStreamToArray(result.stream);
97+
const finishPart = parts.find(part => part.type === 'finish');
98+
99+
expect(finishPart).toBeDefined();
100+
expect(finishPart!.type).toBe('finish');
101+
if (finishPart!.type === 'finish') {
102+
expect(finishPart!.usage).toEqual({
103+
inputTokens: 50,
104+
outputTokens: 5,
105+
totalTokens: 55,
106+
reasoningTokens: undefined,
107+
cachedInputTokens: undefined,
108+
});
109+
}
110+
});
111+
});
112+
});
113+
});

packages/moonshotai/src/moonshotai-chat-language-model.ts

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { OpenAICompatibleChatLanguageModel } from '@ai-sdk/openai-compatible';
22
import { OpenAICompatibleChatConfig } from '@ai-sdk/openai-compatible/internal';
3-
import { LanguageModelV2 } from '@ai-sdk/provider';
3+
import { LanguageModelV2, LanguageModelV2StreamPart } from '@ai-sdk/provider';
44
import { convertMoonshotAIChatUsage } from './convert-moonshotai-chat-usage';
55
import { MoonshotAIChatModelId } from './moonshotai-chat-options';
66

@@ -25,4 +25,59 @@ export class MoonshotAIChatLanguageModel extends OpenAICompatibleChatLanguageMod
2525
usage: convertMoonshotAIChatUsage(usage),
2626
};
2727
}
28+
29+
async doStream(
30+
options: Parameters<LanguageModelV2['doStream']>[0],
31+
): Promise<Awaited<ReturnType<LanguageModelV2['doStream']>>> {
32+
const originalIncludeRawChunks = options.includeRawChunks;
33+
34+
// Enable raw chunks to capture pre-Zod usage data, since MoonshotAI
35+
// returns cached_tokens at the top level of usage (not nested in
36+
// prompt_tokens_details) and the parent's z.object() schema strips it.
37+
const result = await super.doStream({
38+
...options,
39+
includeRawChunks: true,
40+
});
41+
42+
let rawUsage: unknown = undefined;
43+
44+
return {
45+
...result,
46+
stream: result.stream.pipeThrough(
47+
new TransformStream<
48+
LanguageModelV2StreamPart,
49+
LanguageModelV2StreamPart
50+
>({
51+
transform(chunk, controller) {
52+
if (chunk.type === 'raw') {
53+
// Capture raw usage data before Zod strips cached_tokens
54+
const rawValue = chunk.rawValue as Record<string, unknown>;
55+
if (rawValue?.usage != null) {
56+
rawUsage = rawValue.usage;
57+
}
58+
59+
// Only forward raw chunks if originally requested
60+
if (originalIncludeRawChunks) {
61+
controller.enqueue(chunk);
62+
}
63+
return;
64+
}
65+
66+
if (chunk.type === 'finish') {
67+
// Re-convert usage from raw data to capture cached_tokens
68+
controller.enqueue({
69+
...chunk,
70+
usage: rawUsage
71+
? convertMoonshotAIChatUsage(rawUsage as any)
72+
: chunk.usage,
73+
});
74+
return;
75+
}
76+
77+
controller.enqueue(chunk);
78+
},
79+
}),
80+
),
81+
};
82+
}
2883
}

0 commit comments

Comments
 (0)