Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Or download our self-hosted PDF version of the paper [here](https://byterover.de
- 🖥️ Interactive TUI with REPL interface (React/Ink)
- 🧠 Context tree and knowledge storage management
- 🔀 Git-like version control for the context tree (branch, commit, merge, push/pull)
- 🤖 18 LLM providers (Anthropic, OpenAI, Google, Groq, Mistral, xAI, and more)
- 🤖 20 LLM providers (Anthropic, OpenAI, Google, Groq, Mistral, xAI, DeepSeek, and more)
- 🛠️ 24 built-in agent tools (code exec, file ops, knowledge search, memory management)
- 🔄 Cloud sync with push/pull
- 👀 Review workflow for curate operations (approve/reject pending changes)
Expand Down Expand Up @@ -220,7 +220,7 @@ Run `brv --help` for the full command reference.
<details>
<summary><h2>Supported LLM Providers</h2></summary>

ByteRover CLI supports 18 LLM providers out of the box. Connect and switch providers from the dashboard, or use `brv providers connect` / `brv providers switch`.
ByteRover CLI supports 20 LLM providers out of the box. Connect and switch providers from the dashboard, or use `brv providers connect` / `brv providers switch`.

| Provider | Description |
|----------|-------------|
Expand All @@ -233,13 +233,15 @@ ByteRover CLI supports 18 LLM providers out of the box. Connect and switch provi
| Cerebras | Fast inference |
| Cohere | Command models |
| DeepInfra | Open-source model hosting |
| DeepSeek | DeepSeek V3 and R1 reasoning models |
| OpenRouter | Multi-provider gateway |
| Perplexity | Search-augmented models |
| TogetherAI | Open-source model hosting |
| Vercel | AI SDK provider |
| Minimax | Minimax models |
| Moonshot | Kimi models |
| GLM | GLM models |
| GLM Coding Plan | GLM models on Z.AI Coding Plan subscription |
| OpenAI-Compatible | Any OpenAI-compatible API |
| ByteRover | ByteRover's hosted models |

Expand Down
8 changes: 8 additions & 0 deletions src/agent/core/interfaces/i-content-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ export interface GenerateContentResponse {
finishReason: 'error' | 'max_tokens' | 'stop' | 'tool_calls'
/** Raw response from provider (for debugging) */
rawResponse?: unknown
/**
* Reasoning / thinking text emitted by the model (e.g. DeepSeek-R1's
* `reasoning_content`, OpenAI o1's reasoning summary). Required to be
* passed back to the API on the next turn for some providers — DeepSeek-R1
* rejects the next call with "The reasoning_content in the thinking mode
* must be passed back to the API" if absent.
*/
reasoning?: string
/** Tool calls requested by the model */
toolCalls?: ToolCall[]
/** Token usage statistics */
Expand Down
24 changes: 18 additions & 6 deletions src/agent/infra/llm/agent-llm-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@
*
* @param sessionId - Unique identifier for this session
* @param generator - Content generator for LLM calls (with decorators pre-applied)
* @param config - LLM service configuration (model, tokens, temperature)

Check warning on line 213 in src/agent/infra/llm/agent-llm-service.ts

View workflow job for this annotation

GitHub Actions / lint

Missing @param "options.compressionStrategies"
* @param options - Service dependencies
* @param options.toolManager - Tool manager for executing agent tools
* @param options.systemPromptManager - System prompt manager for building system prompts
Expand Down Expand Up @@ -571,9 +571,13 @@
try {
const response = await this.generator.generateContent(request)

// Convert response to InternalMessage format
// Convert response to InternalMessage format. The reasoning field must
// round-trip on the next turn for some providers (e.g. DeepSeek-R1
// rejects with "reasoning_content must be passed back to the API"
// otherwise).
const message: InternalMessage = {
content: response.content,
...(response.reasoning && {reasoning: response.reasoning}),
role: 'assistant',
toolCalls: response.toolCalls,
}
Expand Down Expand Up @@ -616,12 +620,16 @@
): Promise<InternalMessage> {
try {
let accumulatedContent = ''
let accumulatedReasoning = ''
let accumulatedToolCalls: ToolCall[] = []

// Stream chunks and accumulate content
for await (const chunk of this.generator.generateContentStream(request)) {
// Emit thinking/reasoning chunks as events for TUI display
// Emit thinking/reasoning chunks as events for TUI display + accumulate
// for the InternalMessage so it round-trips on the next turn (DeepSeek-R1
// requires reasoning_content to be passed back).
if (chunk.type === StreamChunkType.THINKING && chunk.reasoning) {
accumulatedReasoning += chunk.reasoning
this.sessionEventBus.emit('llmservice:chunk', {
content: chunk.reasoning,
isComplete: chunk.isComplete,
Expand Down Expand Up @@ -652,6 +660,7 @@
// Convert accumulated response to InternalMessage format
const message: InternalMessage = {
content: accumulatedContent || null,
...(accumulatedReasoning && {reasoning: accumulatedReasoning}),
role: 'assistant',
toolCalls: accumulatedToolCalls.length > 0 ? accumulatedToolCalls : undefined,
}
Expand Down Expand Up @@ -1281,8 +1290,10 @@
taskId: taskId || undefined,
})

// Add assistant message to context
await this.contextManager.addAssistantMessage(content)
// Add assistant message to context. Pass reasoning so it round-trips to
// providers that demand it (DeepSeek-R1 rejects with "reasoning_content
// must be passed back to the API" otherwise).
await this.contextManager.addAssistantMessage(content, undefined, lastMessage.reasoning)

return content
}
Expand Down Expand Up @@ -1427,9 +1438,10 @@
// Emit thought events if present
this.handleThoughts(lastMessage, taskId)

// Has tool calls - add assistant message with tool calls
// Has tool calls - add assistant message with tool calls. Pass reasoning
// so it round-trips to providers that demand it.
const assistantContent = this.extractTextContent(lastMessage)
await this.contextManager.addAssistantMessage(assistantContent, lastMessage.toolCalls)
await this.contextManager.addAssistantMessage(assistantContent, lastMessage.toolCalls, lastMessage.reasoning)

// Step 1: Create pending tool parts for all tool calls
for (const toolCall of lastMessage.toolCalls) {
Expand Down
10 changes: 9 additions & 1 deletion src/agent/infra/llm/context/context-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,18 @@
*
* @param content - Message content (text or null if only tool calls)
* @param toolCalls - Optional tool calls made by the assistant
* @param reasoning - Optional reasoning/thinking trace from the model.
* Required to round-trip for providers like DeepSeek-R1 that reject
* the next turn unless reasoning_content is replayed.
*/
public async addAssistantMessage(content: null | string, toolCalls?: InternalMessage['toolCalls']): Promise<void> {
public async addAssistantMessage(
content: null | string,
toolCalls?: InternalMessage['toolCalls'],
reasoning?: string,
): Promise<void> {
const message: InternalMessage = {
content,
...(reasoning && {reasoning}),
role: 'assistant',
toolCalls,
}
Expand Down Expand Up @@ -268,7 +276,7 @@
* @param _metadata.metadata - Execution metadata (duration, tokens, etc.)
* @returns The content that was added
*/
public async addToolResult(

Check warning on line 279 in src/agent/infra/llm/context/context-manager.ts

View workflow job for this annotation

GitHub Actions / lint

Async method 'addToolResult' has too many parameters (5). Maximum allowed is 4
toolCallId: string,
toolName: string,
result: unknown,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ export class AiSdkContentGenerator implements IContentGenerator {
content: result.text,
finishReason: mapFinishReason(result.finishReason, toolCalls.length > 0),
rawResponse: result.response,
...(result.reasoningText && {reasoning: result.reasoningText}),
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
usage: {
completionTokens: result.usage.outputTokens ?? 0,
Expand Down
24 changes: 19 additions & 5 deletions src/agent/infra/llm/generators/ai-sdk-message-converter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,28 +170,42 @@ function convertUserMessage(msg: InternalMessage): ModelMessage | undefined {

/**
* Convert an internal assistant message to AI SDK format.
* Handles text content and tool calls.
* Handles reasoning, text content, and tool calls.
*
* The reasoning part is required when the message is replayed to providers
* that demand the previous turn's thinking trace round-trip back — DeepSeek-R1
* rejects requests with "The reasoning_content in the thinking mode must be
* passed back to the API" if the assistant message in history lacks the
* reasoning that was emitted on the prior turn.
*/
function convertAssistantMessage(msg: InternalMessage): ModelMessage | undefined {
const textContent = extractTextContent(msg)
const hasToolCalls = msg.toolCalls && msg.toolCalls.length > 0
const hasReasoning = Boolean(msg.reasoning)

if (!textContent && !hasToolCalls) {
if (!textContent && !hasToolCalls && !hasReasoning) {
return undefined
}

// Simple text-only case
if (textContent && !hasToolCalls) {
// Simple text-only case (no reasoning, no tools)
if (textContent && !hasToolCalls && !hasReasoning) {
return {content: textContent, role: 'assistant'}
}

// Build mixed content array (text + tool calls)
// Build mixed content array (reasoning + text + tool calls)
type AssistantPart =
| {input: unknown; providerOptions?: Record<string, Record<string, unknown>>; toolCallId: string; toolName: string; type: 'tool-call'}
| {text: string; type: 'reasoning'}
| {text: string; type: 'text'}

const parts: AssistantPart[] = []

// Reasoning must come first — providers that consume it expect it at the
// start of the assistant turn, before any text/tool-call output.
if (msg.reasoning) {
parts.push({text: msg.reasoning, type: 'reasoning'})
}

if (textContent) {
parts.push({text: textContent, type: 'text'})
}
Expand Down
10 changes: 6 additions & 4 deletions src/agent/infra/llm/model-capabilities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
* - Grok: `reasoning_content` or `reasoning_details` fields
* - Gemini via OpenRouter: `reasoning_details` array or `thoughts` field
* - GLM (Zhipu AI): `reasoning_content` field in API response
* - Claude/DeepSeek/MiniMax: `<think>...</think>` XML tags in content
* - DeepSeek (R1/Reasoner): `reasoning_content` field in API response (OpenAI-compatible)
* - Claude/MiniMax: `<think>...</think>` XML tags in content
*/

/**
Expand Down Expand Up @@ -54,7 +55,7 @@
* // { reasoning: true, reasoningFormat: 'think-tags' }
* ```
*/
export function getModelCapabilities(modelId: string): ModelCapabilities {

Check warning on line 58 in src/agent/infra/llm/model-capabilities.ts

View workflow job for this annotation

GitHub Actions / lint

Function 'getModelCapabilities' has a complexity of 28. Maximum allowed is 20
const id = modelId.toLowerCase()

// OpenAI reasoning models (o1, o3, gpt-5 series)
Expand Down Expand Up @@ -132,13 +133,14 @@
}
}

// DeepSeek models use think tags
// DeepSeek models — reasoning models stream `reasoning_content` natively
// (OpenAI-compatible field), not <think> tags.
if (id.includes('deepseek')) {
// DeepSeek-R1 and reasoning models
if (id.includes('r1') || id.includes('reasoner')) {
Comment thread
cuongdo-byterover marked this conversation as resolved.
return {
reasoning: true,
reasoningFormat: 'think-tags',
reasoningField: 'reasoning_content',
reasoningFormat: 'native-field',
}
}

Expand Down
40 changes: 40 additions & 0 deletions src/agent/infra/llm/providers/deepseek.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* DeepSeek Provider Module
*
* Access to DeepSeek V3 (deepseek-chat) and R1 (deepseek-reasoner) via their
* OpenAI-compatible API. The reasoner model streams thinking through the
* native `reasoning_content` field rather than `<think>` tags — see
* model-capabilities.ts for the parser routing.
*/

import {createOpenAICompatible} from '@ai-sdk/openai-compatible'

import type {GeneratorFactoryConfig, ProviderModule} from './types.js'

import {AiSdkContentGenerator} from '../generators/ai-sdk-content-generator.js'

export const deepseekProvider: ProviderModule = {
apiKeyUrl: 'https://platform.deepseek.com/api_keys',
authType: 'api-key',
baseUrl: 'https://api.deepseek.com/v1',
category: 'other',
createGenerator(config: GeneratorFactoryConfig) {
const provider = createOpenAICompatible({
apiKey: config.apiKey!,
baseURL: 'https://api.deepseek.com/v1',
Comment thread
cuongdo-byterover marked this conversation as resolved.
name: 'deepseek',
})

return new AiSdkContentGenerator({
model: provider.chatModel(config.model),
})
},
defaultModel: 'deepseek-chat',
description: 'DeepSeek V3 and R1 reasoning models',
envVars: ['DEEPSEEK_API_KEY'],
id: 'deepseek',
name: 'DeepSeek',
priority: 19,

providerType: 'openai',
}
39 changes: 39 additions & 0 deletions src/agent/infra/llm/providers/glm-coding-plan.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* GLM Coding Plan (Z.AI) Provider Module
*
* Same Z.AI account as the standard `glm` provider but routes through the
* coding-plan endpoint so subscription quota is consumed instead of
* pay-per-token billing.
*/

import {createOpenAICompatible} from '@ai-sdk/openai-compatible'

import type {GeneratorFactoryConfig, ProviderModule} from './types.js'

import {AiSdkContentGenerator} from '../generators/ai-sdk-content-generator.js'

export const glmCodingPlanProvider: ProviderModule = {
apiKeyUrl: 'https://z.ai/manage-apikey/apikey-list',
authType: 'api-key',
baseUrl: 'https://api.z.ai/api/coding/paas/v4',
category: 'other',
createGenerator(config: GeneratorFactoryConfig) {
const provider = createOpenAICompatible({
apiKey: config.apiKey!,
baseURL: 'https://api.z.ai/api/coding/paas/v4',
Comment thread
cuongdo-byterover marked this conversation as resolved.
name: 'glm-coding-plan',
})

return new AiSdkContentGenerator({
model: provider.chatModel(config.model),
})
},
defaultModel: 'glm-4.7',
description: 'GLM models on the Z.AI Coding Plan subscription',
envVars: ['ZHIPU_API_KEY'],
id: 'glm-coding-plan',
name: 'GLM Coding Plan (Z.AI)',
priority: 17.5,
Comment thread
cuongdo-byterover marked this conversation as resolved.

providerType: 'openai',
}
4 changes: 4 additions & 0 deletions src/agent/infra/llm/providers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import {byteroverProvider} from './byterover.js'
import {cerebrasProvider} from './cerebras.js'
import {cohereProvider} from './cohere.js'
import {deepinfraProvider} from './deepinfra.js'
import {deepseekProvider} from './deepseek.js'
import {glmCodingPlanProvider} from './glm-coding-plan.js'
import {glmProvider} from './glm.js'
import {googleProvider} from './google.js'
import {groqProvider} from './groq.js'
Expand All @@ -38,7 +40,9 @@ const PROVIDER_MODULES: Readonly<Record<string, ProviderModule>> = {
cerebras: cerebrasProvider,
cohere: cohereProvider,
deepinfra: deepinfraProvider,
deepseek: deepseekProvider,
glm: glmProvider,
'glm-coding-plan': glmCodingPlanProvider,
google: googleProvider,
groq: groqProvider,
minimax: minimaxProvider,
Expand Down
26 changes: 26 additions & 0 deletions src/server/core/domain/entities/provider-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,19 @@ export const PROVIDER_REGISTRY: Readonly<Record<string, ProviderDefinition>> = {
name: 'DeepInfra',
priority: 10,
},
deepseek: {
apiKeyUrl: 'https://platform.deepseek.com/api_keys',
baseUrl: 'https://api.deepseek.com/v1',
category: 'other',
defaultModel: 'deepseek-chat',
description: 'DeepSeek V3 and R1 reasoning models',
envVars: ['DEEPSEEK_API_KEY'],
headers: {},
id: 'deepseek',
modelsEndpoint: '/models',
name: 'DeepSeek',
priority: 19,
},
glm: {
apiKeyUrl: 'https://open.z.ai',
baseUrl: 'https://api.z.ai/api/paas/v4',
Expand All @@ -157,6 +170,19 @@ export const PROVIDER_REGISTRY: Readonly<Record<string, ProviderDefinition>> = {
name: 'GLM (Z.AI)',
priority: 17,
},
'glm-coding-plan': {
apiKeyUrl: 'https://z.ai/manage-apikey/apikey-list',
baseUrl: 'https://api.z.ai/api/coding/paas/v4',
category: 'other',
defaultModel: 'glm-4.7',
description: 'GLM models on the Z.AI Coding Plan subscription',
envVars: ['ZHIPU_API_KEY'],
headers: {},
id: 'glm-coding-plan',
modelsEndpoint: '',
name: 'GLM Coding Plan (Z.AI)',
priority: 17.5,
},
google: {
apiKeyUrl: 'https://aistudio.google.com/apikey',
baseUrl: '',
Expand Down
11 changes: 11 additions & 0 deletions src/server/infra/http/provider-model-fetcher-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ export async function getModelFetcher(providerId: string): Promise<IProviderMode
case 'cerebras': // falls through
case 'cohere': // falls through
case 'deepinfra': // falls through
case 'deepseek': // falls through
case 'groq': // falls through
case 'mistral': // falls through
case 'togetherai': // falls through
Expand All @@ -85,6 +86,16 @@ export async function getModelFetcher(providerId: string): Promise<IProviderMode
break
}

case 'glm-coding-plan': {
fetcher = new ChatBasedModelFetcher(
'https://api.z.ai/api/coding/paas/v4',
Comment thread
cuongdo-byterover marked this conversation as resolved.
'GLM Coding Plan (Z.AI)',
['glm-4.7', 'glm-4.7-flash', 'glm-4.7-flashx', 'glm-5-turbo', 'glm-4.5', 'glm-4.5-flash'],
Comment thread
cuongdo-byterover marked this conversation as resolved.
)

break
}

case 'google': {
fetcher = new GoogleModelFetcher()

Expand Down
Loading
Loading