diff --git a/README.md b/README.md index 0d392dffd..5be13ab42 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ Or download our self-hosted PDF version of the paper [here](https://byterover.de - 🖥️ Interactive TUI with REPL interface (React/Ink) - 🧠 Context tree and knowledge storage management - 🔀 Git-like version control for the context tree (branch, commit, merge, push/pull) -- 🤖 18 LLM providers (Anthropic, OpenAI, Google, Groq, Mistral, xAI, and more) +- 🤖 20 LLM providers (Anthropic, OpenAI, Google, Groq, Mistral, xAI, DeepSeek, and more) - 🛠️ 24 built-in agent tools (code exec, file ops, knowledge search, memory management) - 🔄 Cloud sync with push/pull - 👀 Review workflow for curate operations (approve/reject pending changes) @@ -220,7 +220,7 @@ Run `brv --help` for the full command reference.

Supported LLM Providers

-ByteRover CLI supports 18 LLM providers out of the box. Connect and switch providers from the dashboard, or use `brv providers connect` / `brv providers switch`. +ByteRover CLI supports 20 LLM providers out of the box. Connect and switch providers from the dashboard, or use `brv providers connect` / `brv providers switch`. | Provider | Description | |----------|-------------| @@ -233,6 +233,7 @@ ByteRover CLI supports 18 LLM providers out of the box. Connect and switch provi | Cerebras | Fast inference | | Cohere | Command models | | DeepInfra | Open-source model hosting | +| DeepSeek | DeepSeek V3 and R1 reasoning models | | OpenRouter | Multi-provider gateway | | Perplexity | Search-augmented models | | TogetherAI | Open-source model hosting | @@ -240,6 +241,7 @@ ByteRover CLI supports 18 LLM providers out of the box. Connect and switch provi | Minimax | Minimax models | | Moonshot | Kimi models | | GLM | GLM models | +| GLM Coding Plan | GLM models on Z.AI Coding Plan subscription | | OpenAI-Compatible | Any OpenAI-compatible API | | ByteRover | ByteRover's hosted models | diff --git a/src/agent/core/interfaces/i-content-generator.ts b/src/agent/core/interfaces/i-content-generator.ts index 8d49ca6a2..3fd0ba4b0 100644 --- a/src/agent/core/interfaces/i-content-generator.ts +++ b/src/agent/core/interfaces/i-content-generator.ts @@ -70,6 +70,14 @@ export interface GenerateContentResponse { finishReason: 'error' | 'max_tokens' | 'stop' | 'tool_calls' /** Raw response from provider (for debugging) */ rawResponse?: unknown + /** + * Reasoning / thinking text emitted by the model (e.g. DeepSeek-R1's + * `reasoning_content`, OpenAI o1's reasoning summary). Required to be + * passed back to the API on the next turn for some providers — DeepSeek-R1 + * rejects the next call with "The reasoning_content in the thinking mode + * must be passed back to the API" if absent. + */ + reasoning?: string /** Tool calls requested by the model */ toolCalls?: ToolCall[] /** Token usage statistics */ diff --git a/src/agent/infra/llm/agent-llm-service.ts b/src/agent/infra/llm/agent-llm-service.ts index 92fdeb98b..80bfaf78f 100644 --- a/src/agent/infra/llm/agent-llm-service.ts +++ b/src/agent/infra/llm/agent-llm-service.ts @@ -571,9 +571,13 @@ export class AgentLLMService implements ILLMService { try { const response = await this.generator.generateContent(request) - // Convert response to InternalMessage format + // Convert response to InternalMessage format. The reasoning field must + // round-trip on the next turn for some providers (e.g. DeepSeek-R1 + // rejects with "reasoning_content must be passed back to the API" + // otherwise). const message: InternalMessage = { content: response.content, + ...(response.reasoning && {reasoning: response.reasoning}), role: 'assistant', toolCalls: response.toolCalls, } @@ -616,12 +620,16 @@ export class AgentLLMService implements ILLMService { ): Promise { try { let accumulatedContent = '' + let accumulatedReasoning = '' let accumulatedToolCalls: ToolCall[] = [] // Stream chunks and accumulate content for await (const chunk of this.generator.generateContentStream(request)) { - // Emit thinking/reasoning chunks as events for TUI display + // Emit thinking/reasoning chunks as events for TUI display + accumulate + // for the InternalMessage so it round-trips on the next turn (DeepSeek-R1 + // requires reasoning_content to be passed back). if (chunk.type === StreamChunkType.THINKING && chunk.reasoning) { + accumulatedReasoning += chunk.reasoning this.sessionEventBus.emit('llmservice:chunk', { content: chunk.reasoning, isComplete: chunk.isComplete, @@ -652,6 +660,7 @@ export class AgentLLMService implements ILLMService { // Convert accumulated response to InternalMessage format const message: InternalMessage = { content: accumulatedContent || null, + ...(accumulatedReasoning && {reasoning: accumulatedReasoning}), role: 'assistant', toolCalls: accumulatedToolCalls.length > 0 ? accumulatedToolCalls : undefined, } @@ -1281,8 +1290,10 @@ export class AgentLLMService implements ILLMService { taskId: taskId || undefined, }) - // Add assistant message to context - await this.contextManager.addAssistantMessage(content) + // Add assistant message to context. Pass reasoning so it round-trips to + // providers that demand it (DeepSeek-R1 rejects with "reasoning_content + // must be passed back to the API" otherwise). + await this.contextManager.addAssistantMessage(content, undefined, lastMessage.reasoning) return content } @@ -1427,9 +1438,10 @@ export class AgentLLMService implements ILLMService { // Emit thought events if present this.handleThoughts(lastMessage, taskId) - // Has tool calls - add assistant message with tool calls + // Has tool calls - add assistant message with tool calls. Pass reasoning + // so it round-trips to providers that demand it. const assistantContent = this.extractTextContent(lastMessage) - await this.contextManager.addAssistantMessage(assistantContent, lastMessage.toolCalls) + await this.contextManager.addAssistantMessage(assistantContent, lastMessage.toolCalls, lastMessage.reasoning) // Step 1: Create pending tool parts for all tool calls for (const toolCall of lastMessage.toolCalls) { diff --git a/src/agent/infra/llm/context/context-manager.ts b/src/agent/infra/llm/context/context-manager.ts index 69598557a..ac2eb0da7 100644 --- a/src/agent/infra/llm/context/context-manager.ts +++ b/src/agent/infra/llm/context/context-manager.ts @@ -185,10 +185,18 @@ export class ContextManager { * * @param content - Message content (text or null if only tool calls) * @param toolCalls - Optional tool calls made by the assistant + * @param reasoning - Optional reasoning/thinking trace from the model. + * Required to round-trip for providers like DeepSeek-R1 that reject + * the next turn unless reasoning_content is replayed. */ - public async addAssistantMessage(content: null | string, toolCalls?: InternalMessage['toolCalls']): Promise { + public async addAssistantMessage( + content: null | string, + toolCalls?: InternalMessage['toolCalls'], + reasoning?: string, + ): Promise { const message: InternalMessage = { content, + ...(reasoning && {reasoning}), role: 'assistant', toolCalls, } diff --git a/src/agent/infra/llm/generators/ai-sdk-content-generator.ts b/src/agent/infra/llm/generators/ai-sdk-content-generator.ts index 1d2e66d03..13617c47e 100644 --- a/src/agent/infra/llm/generators/ai-sdk-content-generator.ts +++ b/src/agent/infra/llm/generators/ai-sdk-content-generator.ts @@ -111,6 +111,7 @@ export class AiSdkContentGenerator implements IContentGenerator { content: result.text, finishReason: mapFinishReason(result.finishReason, toolCalls.length > 0), rawResponse: result.response, + ...(result.reasoningText && {reasoning: result.reasoningText}), toolCalls: toolCalls.length > 0 ? toolCalls : undefined, usage: { completionTokens: result.usage.outputTokens ?? 0, diff --git a/src/agent/infra/llm/generators/ai-sdk-message-converter.ts b/src/agent/infra/llm/generators/ai-sdk-message-converter.ts index cd61a8d95..a0236e1d0 100644 --- a/src/agent/infra/llm/generators/ai-sdk-message-converter.ts +++ b/src/agent/infra/llm/generators/ai-sdk-message-converter.ts @@ -170,28 +170,42 @@ function convertUserMessage(msg: InternalMessage): ModelMessage | undefined { /** * Convert an internal assistant message to AI SDK format. - * Handles text content and tool calls. + * Handles reasoning, text content, and tool calls. + * + * The reasoning part is required when the message is replayed to providers + * that demand the previous turn's thinking trace round-trip back — DeepSeek-R1 + * rejects requests with "The reasoning_content in the thinking mode must be + * passed back to the API" if the assistant message in history lacks the + * reasoning that was emitted on the prior turn. */ function convertAssistantMessage(msg: InternalMessage): ModelMessage | undefined { const textContent = extractTextContent(msg) const hasToolCalls = msg.toolCalls && msg.toolCalls.length > 0 + const hasReasoning = Boolean(msg.reasoning) - if (!textContent && !hasToolCalls) { + if (!textContent && !hasToolCalls && !hasReasoning) { return undefined } - // Simple text-only case - if (textContent && !hasToolCalls) { + // Simple text-only case (no reasoning, no tools) + if (textContent && !hasToolCalls && !hasReasoning) { return {content: textContent, role: 'assistant'} } - // Build mixed content array (text + tool calls) + // Build mixed content array (reasoning + text + tool calls) type AssistantPart = | {input: unknown; providerOptions?: Record>; toolCallId: string; toolName: string; type: 'tool-call'} + | {text: string; type: 'reasoning'} | {text: string; type: 'text'} const parts: AssistantPart[] = [] + // Reasoning must come first — providers that consume it expect it at the + // start of the assistant turn, before any text/tool-call output. + if (msg.reasoning) { + parts.push({text: msg.reasoning, type: 'reasoning'}) + } + if (textContent) { parts.push({text: textContent, type: 'text'}) } diff --git a/src/agent/infra/llm/model-capabilities.ts b/src/agent/infra/llm/model-capabilities.ts index 7946d9abe..4be5cf59a 100644 --- a/src/agent/infra/llm/model-capabilities.ts +++ b/src/agent/infra/llm/model-capabilities.ts @@ -9,7 +9,8 @@ * - Grok: `reasoning_content` or `reasoning_details` fields * - Gemini via OpenRouter: `reasoning_details` array or `thoughts` field * - GLM (Zhipu AI): `reasoning_content` field in API response - * - Claude/DeepSeek/MiniMax: `...` XML tags in content + * - DeepSeek (R1/Reasoner): `reasoning_content` field in API response (OpenAI-compatible) + * - Claude/MiniMax: `...` XML tags in content */ /** @@ -132,13 +133,14 @@ export function getModelCapabilities(modelId: string): ModelCapabilities { } } - // DeepSeek models use think tags + // DeepSeek models — reasoning models stream `reasoning_content` natively + // (OpenAI-compatible field), not tags. if (id.includes('deepseek')) { - // DeepSeek-R1 and reasoning models if (id.includes('r1') || id.includes('reasoner')) { return { reasoning: true, - reasoningFormat: 'think-tags', + reasoningField: 'reasoning_content', + reasoningFormat: 'native-field', } } diff --git a/src/agent/infra/llm/providers/deepseek.ts b/src/agent/infra/llm/providers/deepseek.ts new file mode 100644 index 000000000..b21700ce4 --- /dev/null +++ b/src/agent/infra/llm/providers/deepseek.ts @@ -0,0 +1,40 @@ +/** + * DeepSeek Provider Module + * + * Access to DeepSeek V3 (deepseek-chat) and R1 (deepseek-reasoner) via their + * OpenAI-compatible API. The reasoner model streams thinking through the + * native `reasoning_content` field rather than `` tags — see + * model-capabilities.ts for the parser routing. + */ + +import {createOpenAICompatible} from '@ai-sdk/openai-compatible' + +import type {GeneratorFactoryConfig, ProviderModule} from './types.js' + +import {AiSdkContentGenerator} from '../generators/ai-sdk-content-generator.js' + +export const deepseekProvider: ProviderModule = { + apiKeyUrl: 'https://platform.deepseek.com/api_keys', + authType: 'api-key', + baseUrl: 'https://api.deepseek.com/v1', + category: 'other', + createGenerator(config: GeneratorFactoryConfig) { + const provider = createOpenAICompatible({ + apiKey: config.apiKey!, + baseURL: 'https://api.deepseek.com/v1', + name: 'deepseek', + }) + + return new AiSdkContentGenerator({ + model: provider.chatModel(config.model), + }) + }, + defaultModel: 'deepseek-chat', + description: 'DeepSeek V3 and R1 reasoning models', + envVars: ['DEEPSEEK_API_KEY'], + id: 'deepseek', + name: 'DeepSeek', + priority: 19, + + providerType: 'openai', +} diff --git a/src/agent/infra/llm/providers/glm-coding-plan.ts b/src/agent/infra/llm/providers/glm-coding-plan.ts new file mode 100644 index 000000000..cf8983a52 --- /dev/null +++ b/src/agent/infra/llm/providers/glm-coding-plan.ts @@ -0,0 +1,39 @@ +/** + * GLM Coding Plan (Z.AI) Provider Module + * + * Same Z.AI account as the standard `glm` provider but routes through the + * coding-plan endpoint so subscription quota is consumed instead of + * pay-per-token billing. + */ + +import {createOpenAICompatible} from '@ai-sdk/openai-compatible' + +import type {GeneratorFactoryConfig, ProviderModule} from './types.js' + +import {AiSdkContentGenerator} from '../generators/ai-sdk-content-generator.js' + +export const glmCodingPlanProvider: ProviderModule = { + apiKeyUrl: 'https://z.ai/manage-apikey/apikey-list', + authType: 'api-key', + baseUrl: 'https://api.z.ai/api/coding/paas/v4', + category: 'other', + createGenerator(config: GeneratorFactoryConfig) { + const provider = createOpenAICompatible({ + apiKey: config.apiKey!, + baseURL: 'https://api.z.ai/api/coding/paas/v4', + name: 'glm-coding-plan', + }) + + return new AiSdkContentGenerator({ + model: provider.chatModel(config.model), + }) + }, + defaultModel: 'glm-4.7', + description: 'GLM models on the Z.AI Coding Plan subscription', + envVars: ['ZHIPU_API_KEY'], + id: 'glm-coding-plan', + name: 'GLM Coding Plan (Z.AI)', + priority: 17.5, + + providerType: 'openai', +} diff --git a/src/agent/infra/llm/providers/index.ts b/src/agent/infra/llm/providers/index.ts index 059afb107..d6cdef877 100644 --- a/src/agent/infra/llm/providers/index.ts +++ b/src/agent/infra/llm/providers/index.ts @@ -14,6 +14,8 @@ import {byteroverProvider} from './byterover.js' import {cerebrasProvider} from './cerebras.js' import {cohereProvider} from './cohere.js' import {deepinfraProvider} from './deepinfra.js' +import {deepseekProvider} from './deepseek.js' +import {glmCodingPlanProvider} from './glm-coding-plan.js' import {glmProvider} from './glm.js' import {googleProvider} from './google.js' import {groqProvider} from './groq.js' @@ -38,7 +40,9 @@ const PROVIDER_MODULES: Readonly> = { cerebras: cerebrasProvider, cohere: cohereProvider, deepinfra: deepinfraProvider, + deepseek: deepseekProvider, glm: glmProvider, + 'glm-coding-plan': glmCodingPlanProvider, google: googleProvider, groq: groqProvider, minimax: minimaxProvider, diff --git a/src/server/core/domain/entities/provider-registry.ts b/src/server/core/domain/entities/provider-registry.ts index b5c1f7b53..aff9fe1d6 100644 --- a/src/server/core/domain/entities/provider-registry.ts +++ b/src/server/core/domain/entities/provider-registry.ts @@ -144,6 +144,19 @@ export const PROVIDER_REGISTRY: Readonly> = { name: 'DeepInfra', priority: 10, }, + deepseek: { + apiKeyUrl: 'https://platform.deepseek.com/api_keys', + baseUrl: 'https://api.deepseek.com/v1', + category: 'other', + defaultModel: 'deepseek-chat', + description: 'DeepSeek V3 and R1 reasoning models', + envVars: ['DEEPSEEK_API_KEY'], + headers: {}, + id: 'deepseek', + modelsEndpoint: '/models', + name: 'DeepSeek', + priority: 19, + }, glm: { apiKeyUrl: 'https://open.z.ai', baseUrl: 'https://api.z.ai/api/paas/v4', @@ -157,6 +170,19 @@ export const PROVIDER_REGISTRY: Readonly> = { name: 'GLM (Z.AI)', priority: 17, }, + 'glm-coding-plan': { + apiKeyUrl: 'https://z.ai/manage-apikey/apikey-list', + baseUrl: 'https://api.z.ai/api/coding/paas/v4', + category: 'other', + defaultModel: 'glm-4.7', + description: 'GLM models on the Z.AI Coding Plan subscription', + envVars: ['ZHIPU_API_KEY'], + headers: {}, + id: 'glm-coding-plan', + modelsEndpoint: '', + name: 'GLM Coding Plan (Z.AI)', + priority: 17.5, + }, google: { apiKeyUrl: 'https://aistudio.google.com/apikey', baseUrl: '', diff --git a/src/server/infra/http/provider-model-fetcher-registry.ts b/src/server/infra/http/provider-model-fetcher-registry.ts index eabbb20d1..027e0aee9 100644 --- a/src/server/infra/http/provider-model-fetcher-registry.ts +++ b/src/server/infra/http/provider-model-fetcher-registry.ts @@ -63,6 +63,7 @@ export async function getModelFetcher(providerId: string): Promise { - try { - await axios.post( - `${this.baseUrl}/chat/completions`, - { - max_tokens: 1, - messages: [{content: 'hi', role: 'user'}], - model: this.knownModels[0]?.id ?? 'default', - }, - { - headers: { - Authorization: `Bearer ${apiKey}`, - 'Content-Type': 'application/json', + // Iterate through known models so a single missing model on a tier (e.g. + // GLM Coding Plan doesn't yet serve the latest glm-4.7) doesn't + // misclassify a valid key as invalid. We accept the key as soon as ANY + // model responds successfully, OR returns a non-auth error like 429/5xx + // (which still proves the key passed auth). + const candidates = this.knownModels.length > 0 ? this.knownModels : [{id: 'default'}] + let lastNonAuthError: unknown + + for (const candidate of candidates) { + try { + // eslint-disable-next-line no-await-in-loop + await axios.post( + `${this.baseUrl}/chat/completions`, + { + max_tokens: 1, + messages: [{content: 'hi', role: 'user'}], + model: candidate.id, }, - httpAgent: ProxyConfig.getProxyAgent(), - httpsAgent: ProxyConfig.getProxyAgent(), - proxy: false, - timeout: 15_000, - }, - ) - - return {isValid: true} - } catch (error) { - if (isAxiosError(error)) { - if (error.response?.status === 401) { - return {error: 'Invalid API key', isValid: false} - } + { + headers: { + Authorization: `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + }, + httpAgent: ProxyConfig.getProxyAgent(), + httpsAgent: ProxyConfig.getProxyAgent(), + proxy: false, + timeout: 15_000, + }, + ) - if (error.response?.status === 403) { - return {error: 'API key does not have required permissions', isValid: false} + return {isValid: true} + } catch (error) { + if (isAxiosError(error)) { + if (error.response?.status === 401) { + return {error: 'Invalid API key', isValid: false} + } + + if (error.response?.status === 403) { + return {error: 'API key does not have required permissions', isValid: false} + } + + // 400/404 may mean "model not available on this tier" — try next. + if (error.response?.status === 400 || error.response?.status === 404) { + lastNonAuthError = error + continue + } + + // Axios errors that are not 401/403/400/404 (e.g. 429, 5xx, or + // network-level errors with no response like ECONNREFUSED) are + // treated as "key accepted" — either auth was passed (429/5xx) or + // we can't determine otherwise (no response). Optimistic: prefer a + // false-positive valid over a false-negative invalid. + return {isValid: true} } - // Other errors (429, 400, etc.) mean the key was accepted - return {isValid: true} + lastNonAuthError = error } + } - return {error: error instanceof Error ? error.message : 'Unknown error', isValid: false} + // Every candidate model returned 400/404 or a non-axios error and none + // gave us a positive auth signal. Treat the key as inconclusive — but + // since 401/403 was never observed, surface the last error so the user + // can see the real cause (often a model-availability issue, not auth). + return { + error: lastNonAuthError instanceof Error ? lastNonAuthError.message : 'Validation failed for all known models', + isValid: false, } } } diff --git a/src/webui/assets/providers/deepseek-provider.svg b/src/webui/assets/providers/deepseek-provider.svg new file mode 100644 index 000000000..2c92cb6a9 --- /dev/null +++ b/src/webui/assets/providers/deepseek-provider.svg @@ -0,0 +1 @@ +DeepSeek icon diff --git a/src/webui/features/provider/components/provider-flow/provider-icons.ts b/src/webui/features/provider/components/provider-flow/provider-icons.ts index 652465489..7f5636e02 100644 --- a/src/webui/features/provider/components/provider-flow/provider-icons.ts +++ b/src/webui/features/provider/components/provider-flow/provider-icons.ts @@ -3,6 +3,7 @@ import byterover from '../../../../assets/providers/byterover-provider.svg' import cerebras from '../../../../assets/providers/cerebras-provider.svg' import cohere from '../../../../assets/providers/cohere-provider.svg' import deepinfra from '../../../../assets/providers/deepinfra-provider.svg' +import deepseek from '../../../../assets/providers/deepseek-provider.svg' import gemini from '../../../../assets/providers/gemini-provider.svg' import groq from '../../../../assets/providers/groq-provider.svg' import kimi from '../../../../assets/providers/kimi-provider.svg' @@ -23,7 +24,9 @@ export const providerIcons: Record = { cerebras, cohere, deepinfra, + deepseek, glm: zai, + 'glm-coding-plan': zai, google: gemini, groq, minimax, diff --git a/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts b/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts index 23c49f5b8..38e497818 100644 --- a/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts +++ b/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts @@ -1,8 +1,9 @@ import {expect} from 'chai' import type {ToolSet as InternalToolSet} from '../../../../../src/agent/core/domain/tools/types.js' +import type {InternalMessage} from '../../../../../src/agent/core/interfaces/message-types.js' -import {toAiSdkTools} from '../../../../../src/agent/infra/llm/generators/ai-sdk-message-converter.js' +import {toAiSdkTools, toModelMessages} from '../../../../../src/agent/infra/llm/generators/ai-sdk-message-converter.js' function makeTool(description: string): InternalToolSet[string] { return { @@ -18,6 +19,10 @@ function getProviderOptions(tool: unknown): Record | undefined const EPHEMERAL_CACHE_CONTROL = {anthropic: {cacheControl: {type: 'ephemeral'}}} +// File tests two unrelated exports (toAiSdkTools, toModelMessages); each +// gets its own top-level describe per the reviewer's structural feedback. +/* eslint-disable mocha/max-top-level-suites */ + describe('toAiSdkTools — anthropic cache_control on last tool', () => { it('returns undefined when tools is undefined or empty', () => { expect(toAiSdkTools()).to.equal(undefined) @@ -53,3 +58,92 @@ describe('toAiSdkTools — anthropic cache_control on last tool', () => { expect(getProviderOptions(result?.middleTool)).to.deep.equal(EPHEMERAL_CACHE_CONTROL) }) }) + +describe('toModelMessages — reasoning round-trip', () => { + // DeepSeek-R1 rejects with "The reasoning_content in the thinking mode + // must be passed back to the API" if a prior assistant turn's reasoning + // is not present when the conversation history is replayed. + + it('includes a reasoning part on the assistant message when msg.reasoning is set', () => { + const messages: InternalMessage[] = [ + {content: 'hello', role: 'user'}, + { + content: 'final answer', + reasoning: 'Let me think... the answer must be X because Y.', + role: 'assistant', + }, + ] + + const result = toModelMessages(messages) + const assistant = result.find((m) => m.role === 'assistant') + expect(assistant).to.exist + + // Assistant content should be a parts array with reasoning ahead of text + expect(Array.isArray(assistant?.content)).to.be.true + const parts = assistant?.content as Array<{text?: string; type: string}> + const types = parts.map((p) => p.type) + expect(types).to.include('reasoning') + expect(types).to.include('text') + expect(types.indexOf('reasoning')).to.be.lessThan(types.indexOf('text')) + + const reasoningPart = parts.find((p) => p.type === 'reasoning') + expect(reasoningPart?.text).to.equal('Let me think... the answer must be X because Y.') + }) + + it('keeps the simple text-only path when reasoning is absent', () => { + const messages: InternalMessage[] = [ + {content: 'plain answer', role: 'assistant'}, + ] + + const result = toModelMessages(messages) + const assistant = result.find((m) => m.role === 'assistant') + // Pre-fix behavior preserved: no parts array, just a string + expect(assistant?.content).to.equal('plain answer') + }) + + it('preserves reasoning-before-tool-call ordering when both are present', () => { + const messages: InternalMessage[] = [ + { + content: '', + reasoning: 'I need to look up X', + role: 'assistant', + toolCalls: [ + { + function: {arguments: '{"q":"hello"}', name: 'lookup'}, + id: 'call-1', + type: 'function', + }, + ], + }, + ] + + const result = toModelMessages(messages) + const assistant = result.find((m) => m.role === 'assistant') + const parts = assistant?.content as Array<{type: string}> + const types = parts.map((p) => p.type) + // reasoning must precede tool-call so providers see it as a coherent turn + expect(types[0]).to.equal('reasoning') + expect(types).to.include('tool-call') + }) + + it('returns no message when both content/toolCalls/reasoning are empty', () => { + const messages: InternalMessage[] = [ + {content: '', role: 'assistant'}, + ] + + const result = toModelMessages(messages) + expect(result.find((m) => m.role === 'assistant')).to.equal(undefined) + }) + + it('emits a message with only a reasoning part when text and toolCalls are absent', () => { + const messages: InternalMessage[] = [ + {content: null, reasoning: 'silent think', role: 'assistant'}, + ] + const result = toModelMessages(messages) + const assistant = result.find((m) => m.role === 'assistant') + expect(assistant).to.exist + const parts = assistant?.content as Array<{type: string}> + expect(parts).to.have.length(1) + expect(parts[0].type).to.equal('reasoning') + }) +}) diff --git a/test/unit/agent/llm/model-capabilities.test.ts b/test/unit/agent/llm/model-capabilities.test.ts new file mode 100644 index 000000000..fb9095ae3 --- /dev/null +++ b/test/unit/agent/llm/model-capabilities.test.ts @@ -0,0 +1,25 @@ +import {expect} from 'chai' + +import {getModelCapabilities} from '../../../../src/agent/infra/llm/model-capabilities.js' + +describe('getModelCapabilities — DeepSeek', () => { + it('reports native reasoning_content for deepseek-reasoner', () => { + const caps = getModelCapabilities('deepseek-reasoner') + expect(caps.reasoning).to.equal(true) + expect(caps.reasoningField).to.equal('reasoning_content') + expect(caps.reasoningFormat).to.equal('native-field') + }) + + it('reports native reasoning_content for deepseek-r1', () => { + const caps = getModelCapabilities('deepseek-r1') + expect(caps.reasoning).to.equal(true) + expect(caps.reasoningField).to.equal('reasoning_content') + expect(caps.reasoningFormat).to.equal('native-field') + }) + + it('reports no reasoning for deepseek-chat', () => { + const caps = getModelCapabilities('deepseek-chat') + expect(caps.reasoning).to.equal(false) + expect(caps.reasoningFormat).to.equal('none') + }) +}) diff --git a/test/unit/agent/llm/providers/deepseek.test.ts b/test/unit/agent/llm/providers/deepseek.test.ts new file mode 100644 index 000000000..53cf7a00d --- /dev/null +++ b/test/unit/agent/llm/providers/deepseek.test.ts @@ -0,0 +1,31 @@ +import {expect} from 'chai' + +import {getProviderModule} from '../../../../../src/agent/infra/llm/providers/index.js' + +describe('deepseek provider module', () => { + const mod = getProviderModule('deepseek') + + it('is registered', () => { + expect(mod).to.not.be.undefined + }) + + it('uses api-key auth', () => { + expect(mod?.authType).to.equal('api-key') + }) + + it('uses the openai provider type for formatter/tokenizer selection', () => { + expect(mod?.providerType).to.equal('openai') + }) + + it('defaults to deepseek-chat', () => { + expect(mod?.defaultModel).to.equal('deepseek-chat') + }) + + it('points at the official DeepSeek API base URL', () => { + expect(mod?.baseUrl).to.equal('https://api.deepseek.com/v1') + }) + + it('exposes DEEPSEEK_API_KEY for env detection', () => { + expect(mod?.envVars).to.include('DEEPSEEK_API_KEY') + }) +}) diff --git a/test/unit/agent/llm/providers/glm-coding-plan.test.ts b/test/unit/agent/llm/providers/glm-coding-plan.test.ts new file mode 100644 index 000000000..047b08b68 --- /dev/null +++ b/test/unit/agent/llm/providers/glm-coding-plan.test.ts @@ -0,0 +1,37 @@ +import {expect} from 'chai' + +import {getProviderModule} from '../../../../../src/agent/infra/llm/providers/index.js' + +describe('glm-coding-plan provider module', () => { + const mod = getProviderModule('glm-coding-plan') + + it('is registered', () => { + expect(mod).to.not.be.undefined + }) + + it('uses api-key auth', () => { + expect(mod?.authType).to.equal('api-key') + }) + + it('uses the openai provider type for formatter/tokenizer selection', () => { + expect(mod?.providerType).to.equal('openai') + }) + + it('defaults to glm-4.7', () => { + expect(mod?.defaultModel).to.equal('glm-4.7') + }) + + it('points at the Z.AI Coding Plan endpoint', () => { + expect(mod?.baseUrl).to.equal('https://api.z.ai/api/coding/paas/v4') + }) + + it('exposes ZHIPU_API_KEY for env detection', () => { + expect(mod?.envVars).to.include('ZHIPU_API_KEY') + }) + + it('coexists with the standard glm provider', () => { + const standard = getProviderModule('glm') + expect(standard).to.not.be.undefined + expect(standard?.baseUrl).to.not.equal(mod?.baseUrl) + }) +}) diff --git a/test/unit/core/domain/entities/provider-registry.test.ts b/test/unit/core/domain/entities/provider-registry.test.ts index 564442fca..58ebba0cd 100644 --- a/test/unit/core/domain/entities/provider-registry.test.ts +++ b/test/unit/core/domain/entities/provider-registry.test.ts @@ -130,4 +130,62 @@ describe('Provider Registry', () => { } }) }) + + describe('GLM Coding Plan provider', () => { + it('should be registered', () => { + expect(getProviderById('glm-coding-plan')).to.not.be.undefined + }) + + it('should point at the Z.AI Coding Plan endpoint', () => { + expect(getProviderById('glm-coding-plan')?.baseUrl).to.equal('https://api.z.ai/api/coding/paas/v4') + }) + + it('should reuse ZHIPU_API_KEY for env detection', () => { + expect(getProviderById('glm-coding-plan')?.envVars).to.include('ZHIPU_API_KEY') + }) + + it('should default to glm-4.7', () => { + expect(getProviderById('glm-coding-plan')?.defaultModel).to.equal('glm-4.7') + }) + + it('should not require OAuth', () => { + expect(getProviderById('glm-coding-plan')?.oauth).to.be.undefined + }) + + it('should require an API key by default', () => { + expect(providerRequiresApiKey('glm-coding-plan')).to.be.true + }) + + it('should coexist with the standard glm provider (no rename)', () => { + expect(getProviderById('glm')).to.not.be.undefined + expect(getProviderById('glm-coding-plan')).to.not.be.undefined + expect(getProviderById('glm')?.baseUrl).to.not.equal(getProviderById('glm-coding-plan')?.baseUrl) + }) + }) + + describe('DeepSeek provider', () => { + it('should be registered', () => { + expect(getProviderById('deepseek')).to.not.be.undefined + }) + + it('should point at the official OpenAI-compatible API base URL', () => { + expect(getProviderById('deepseek')?.baseUrl).to.equal('https://api.deepseek.com/v1') + }) + + it('should detect DEEPSEEK_API_KEY from the environment', () => { + expect(getProviderById('deepseek')?.envVars).to.include('DEEPSEEK_API_KEY') + }) + + it('should default to deepseek-chat', () => { + expect(getProviderById('deepseek')?.defaultModel).to.equal('deepseek-chat') + }) + + it('should not require OAuth', () => { + expect(getProviderById('deepseek')?.oauth).to.be.undefined + }) + + it('should require an API key by default', () => { + expect(providerRequiresApiKey('deepseek')).to.be.true + }) + }) }) diff --git a/test/unit/infra/http/provider-model-fetchers.test.ts b/test/unit/infra/http/provider-model-fetchers.test.ts index 22b825407..52ef5131d 100644 --- a/test/unit/infra/http/provider-model-fetchers.test.ts +++ b/test/unit/infra/http/provider-model-fetchers.test.ts @@ -1,10 +1,20 @@ +import axios from 'axios' import {expect} from 'chai' -import {type SinonStub, stub} from 'sinon' +import {restore, type SinonStub, stub} from 'sinon' import type {ProviderModelInfo} from '../../../../src/server/core/interfaces/i-provider-model-fetcher.js' import type {ModelsDevClient} from '../../../../src/server/infra/http/models-dev-client.js' -import {CODEX_FALLBACK_MODELS, OpenAIModelFetcher} from '../../../../src/server/infra/http/provider-model-fetchers.js' +import {ChatBasedModelFetcher, CODEX_FALLBACK_MODELS, OpenAIModelFetcher} from '../../../../src/server/infra/http/provider-model-fetchers.js' + +function makeAxiosErr(status: number): Error { + const err = new Error(`HTTP ${status}`) + Object.assign(err, { + isAxiosError: true, + response: {data: {}, status, statusText: ''}, + }) + return err +} function createMockModelsDevClient(models: ProviderModelInfo[] = []): ModelsDevClient { return { @@ -146,4 +156,103 @@ describe('OpenAIModelFetcher', () => { expect((mockClient.getModelsForProvider as SinonStub).calledWith('openai', true)).to.be.true }) }) + + describe('ChatBasedModelFetcher.validateApiKey (ENG-2609)', () => { + // Stubs `axios.post` directly so we can simulate per-call responses across + // the model-iteration loop. Mirrors the GLM Coding Plan failure where + // glm-4.7 isn't on the coding-plan tier but glm-4.5 is. + + afterEach(() => { + restore() + }) + + it('returns isValid:true on the first model that succeeds', async () => { + const post = stub(axios, 'post').resolves({data: {}, status: 200}) + const fetcher = new ChatBasedModelFetcher('https://api.example.com/v1', 'X', ['model-a', 'model-b']) + + const result = await fetcher.validateApiKey('sk-good') + expect(result).to.deep.equal({isValid: true}) + expect(post.callCount).to.equal(1) + }) + + it('skips a 400 model-not-found and retries with the next model', async () => { + const post = stub(axios, 'post') + post.onFirstCall().rejects(makeAxiosErr(400)) + post.onSecondCall().resolves({data: {}, status: 200}) + const fetcher = new ChatBasedModelFetcher('https://api.z.ai/api/coding/paas/v4', 'GLM Coding Plan', ['glm-4.7', 'glm-4.5']) + + const result = await fetcher.validateApiKey('sk-good') + expect(result).to.deep.equal({isValid: true}) + expect(post.callCount).to.equal(2) + }) + + it('skips 404 model-not-found and retries', async () => { + const post = stub(axios, 'post') + post.onFirstCall().rejects(makeAxiosErr(404)) + post.onSecondCall().resolves({data: {}, status: 200}) + const fetcher = new ChatBasedModelFetcher('https://api.example.com/v1', 'X', ['a', 'b']) + + const result = await fetcher.validateApiKey('sk-good') + expect(result.isValid).to.equal(true) + expect(post.callCount).to.equal(2) + }) + + it('returns isValid:false on 401 even if later models would have worked', async () => { + const post = stub(axios, 'post').rejects(makeAxiosErr(401)) + const fetcher = new ChatBasedModelFetcher('https://api.example.com/v1', 'X', ['a', 'b']) + + const result = await fetcher.validateApiKey('sk-bad') + expect(result.error).to.equal('Invalid API key') + expect(result.isValid).to.equal(false) + expect(post.callCount).to.equal(1) // short-circuits, doesn't try b + }) + + it('returns isValid:false on 403', async () => { + stub(axios, 'post').rejects(makeAxiosErr(403)) + const fetcher = new ChatBasedModelFetcher('https://api.example.com/v1', 'X', ['a']) + + const result = await fetcher.validateApiKey('sk-no-perm') + expect(result.error).to.equal('API key does not have required permissions') + expect(result.isValid).to.equal(false) + }) + + it('treats 429 (rate limit) as key-accepted', async () => { + stub(axios, 'post').rejects(makeAxiosErr(429)) + const fetcher = new ChatBasedModelFetcher('https://api.example.com/v1', 'X', ['a']) + + const result = await fetcher.validateApiKey('sk-rate-limited') + expect(result).to.deep.equal({isValid: true}) + }) + + it('treats 5xx as key-accepted (server-side issue, not auth)', async () => { + stub(axios, 'post').rejects(makeAxiosErr(503)) + const fetcher = new ChatBasedModelFetcher('https://api.example.com/v1', 'X', ['a']) + + const result = await fetcher.validateApiKey('sk-good') + expect(result.isValid).to.equal(true) + }) + + it('returns isValid:false with last error if all models 400/404', async () => { + const post = stub(axios, 'post') + post.onFirstCall().rejects(makeAxiosErr(400)) + post.onSecondCall().rejects(makeAxiosErr(404)) + const fetcher = new ChatBasedModelFetcher('https://api.example.com/v1', 'X', ['a', 'b']) + + const result = await fetcher.validateApiKey('sk-???') + expect(result.isValid).to.equal(false) + expect(post.callCount).to.equal(2) + }) + + it('falls back to default model when knownModels is empty', async () => { + const post = stub(axios, 'post').resolves({data: {}, status: 200}) + const fetcher = new ChatBasedModelFetcher('https://api.example.com/v1', 'X', []) + + const result = await fetcher.validateApiKey('sk-good') + expect(result).to.deep.equal({isValid: true}) + expect(post.calledOnce).to.be.true + // Should have used 'default' as the fallback model id + const body = post.firstCall.args[1] as {model: string} + expect(body.model).to.equal('default') + }) + }) })