campfirein · bao-byterover · May 6, 2026 · May 4, 2026 · May 5, 2026 · May 5, 2026
@@ -34,7 +34,7 @@ Or download our self-hosted PDF version of the paper [here](https://byterover.de
 - 🖥️ Interactive TUI with REPL interface (React/Ink)
 - 🧠 Context tree and knowledge storage management
 - 🔀 Git-like version control for the context tree (branch, commit, merge, push/pull)
-- 🤖 18 LLM providers (Anthropic, OpenAI, Google, Groq, Mistral, xAI, and more)
+- 🤖 20 LLM providers (Anthropic, OpenAI, Google, Groq, Mistral, xAI, DeepSeek, and more)
 - 🛠️ 24 built-in agent tools (code exec, file ops, knowledge search, memory management)
 - 🔄 Cloud sync with push/pull
 - 👀 Review workflow for curate operations (approve/reject pending changes)
@@ -220,7 +220,7 @@ Run `brv --help` for the full command reference.
 <details>
 <summary><h2>Supported LLM Providers</h2></summary>
 
-ByteRover CLI supports 18 LLM providers out of the box. Connect and switch providers from the dashboard, or use `brv providers connect` / `brv providers switch`.
+ByteRover CLI supports 20 LLM providers out of the box. Connect and switch providers from the dashboard, or use `brv providers connect` / `brv providers switch`.
 
 | Provider | Description |
 |----------|-------------|
@@ -233,13 +233,15 @@ ByteRover CLI supports 18 LLM providers out of the box. Connect and switch provi
 | Cerebras | Fast inference |
 | Cohere | Command models |
 | DeepInfra | Open-source model hosting |
+| DeepSeek | DeepSeek V3 and R1 reasoning models |
 | OpenRouter | Multi-provider gateway |
 | Perplexity | Search-augmented models |
 | TogetherAI | Open-source model hosting |
 | Vercel | AI SDK provider |
 | Minimax | Minimax models |
 | Moonshot | Kimi models |
 | GLM | GLM models |
+| GLM Coding Plan | GLM models on Z.AI Coding Plan subscription |
 | OpenAI-Compatible | Any OpenAI-compatible API |
 | ByteRover | ByteRover's hosted models |
 

@@ -70,6 +70,14 @@ export interface GenerateContentResponse {
   finishReason: 'error' | 'max_tokens' | 'stop' | 'tool_calls'
   /** Raw response from provider (for debugging) */
   rawResponse?: unknown
+  /**
+   * Reasoning / thinking text emitted by the model (e.g. DeepSeek-R1's
+   * `reasoning_content`, OpenAI o1's reasoning summary). Required to be
+   * passed back to the API on the next turn for some providers — DeepSeek-R1
+   * rejects the next call with "The reasoning_content in the thinking mode
+   * must be passed back to the API" if absent.
+   */
+  reasoning?: string
   /** Tool calls requested by the model */
   toolCalls?: ToolCall[]
   /** Token usage statistics */

@@ -210,7 +210,7 @@
   *
   * @param sessionId - Unique identifier for this session
   * @param generator - Content generator for LLM calls (with decorators pre-applied)
   * @param config - LLM service configuration (model, tokens, temperature)
   * @param options - Service dependencies
   * @param options.toolManager - Tool manager for executing agent tools
   * @param options.systemPromptManager - System prompt manager for building system prompts
@@ -571,9 +571,13 @@
     try {
       const response = await this.generator.generateContent(request)
 
-      // Convert response to InternalMessage format
+      // Convert response to InternalMessage format. The reasoning field must
+      // round-trip on the next turn for some providers (e.g. DeepSeek-R1
+      // rejects with "reasoning_content must be passed back to the API"
+      // otherwise).
       const message: InternalMessage = {
         content: response.content,
+        ...(response.reasoning && {reasoning: response.reasoning}),
         role: 'assistant',
         toolCalls: response.toolCalls,
       }
@@ -616,12 +620,16 @@
   ): Promise<InternalMessage> {
     try {
       let accumulatedContent = ''
+      let accumulatedReasoning = ''
       let accumulatedToolCalls: ToolCall[] = []
 
       // Stream chunks and accumulate content
       for await (const chunk of this.generator.generateContentStream(request)) {
-        // Emit thinking/reasoning chunks as events for TUI display
+        // Emit thinking/reasoning chunks as events for TUI display + accumulate
+        // for the InternalMessage so it round-trips on the next turn (DeepSeek-R1
+        // requires reasoning_content to be passed back).
         if (chunk.type === StreamChunkType.THINKING && chunk.reasoning) {
+          accumulatedReasoning += chunk.reasoning
           this.sessionEventBus.emit('llmservice:chunk', {
             content: chunk.reasoning,
             isComplete: chunk.isComplete,
@@ -652,6 +660,7 @@
       // Convert accumulated response to InternalMessage format
       const message: InternalMessage = {
         content: accumulatedContent || null,
+        ...(accumulatedReasoning && {reasoning: accumulatedReasoning}),
         role: 'assistant',
         toolCalls: accumulatedToolCalls.length > 0 ? accumulatedToolCalls : undefined,
       }
@@ -1281,8 +1290,10 @@
       taskId: taskId || undefined,
     })
 
-    // Add assistant message to context
-    await this.contextManager.addAssistantMessage(content)
+    // Add assistant message to context. Pass reasoning so it round-trips to
+    // providers that demand it (DeepSeek-R1 rejects with "reasoning_content
+    // must be passed back to the API" otherwise).
+    await this.contextManager.addAssistantMessage(content, undefined, lastMessage.reasoning)
 
     return content
   }
@@ -1427,9 +1438,10 @@
     // Emit thought events if present
     this.handleThoughts(lastMessage, taskId)
 
-    // Has tool calls - add assistant message with tool calls
+    // Has tool calls - add assistant message with tool calls. Pass reasoning
+    // so it round-trips to providers that demand it.
     const assistantContent = this.extractTextContent(lastMessage)
-    await this.contextManager.addAssistantMessage(assistantContent, lastMessage.toolCalls)
+    await this.contextManager.addAssistantMessage(assistantContent, lastMessage.toolCalls, lastMessage.reasoning)
 
     // Step 1: Create pending tool parts for all tool calls
     for (const toolCall of lastMessage.toolCalls) {

@@ -185,10 +185,18 @@
    *
    * @param content - Message content (text or null if only tool calls)
    * @param toolCalls - Optional tool calls made by the assistant
+   * @param reasoning - Optional reasoning/thinking trace from the model.
+   *   Required to round-trip for providers like DeepSeek-R1 that reject
+   *   the next turn unless reasoning_content is replayed.
    */
-  public async addAssistantMessage(content: null | string, toolCalls?: InternalMessage['toolCalls']): Promise<void> {
+  public async addAssistantMessage(
+    content: null | string,
+    toolCalls?: InternalMessage['toolCalls'],
+    reasoning?: string,
+  ): Promise<void> {
     const message: InternalMessage = {
       content,
+      ...(reasoning && {reasoning}),
       role: 'assistant',
       toolCalls,
     }
@@ -268,7 +276,7 @@
   * @param _metadata.metadata - Execution metadata (duration, tokens, etc.)
   * @returns The content that was added
   */
  public async addToolResult(
    toolCallId: string,
    toolName: string,
    result: unknown,

@@ -111,6 +111,7 @@ export class AiSdkContentGenerator implements IContentGenerator {
       content: result.text,
       finishReason: mapFinishReason(result.finishReason, toolCalls.length > 0),
       rawResponse: result.response,
+      ...(result.reasoningText && {reasoning: result.reasoningText}),
       toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
       usage: {
         completionTokens: result.usage.outputTokens ?? 0,

@@ -170,28 +170,42 @@ function convertUserMessage(msg: InternalMessage): ModelMessage | undefined {
 
 /**
  * Convert an internal assistant message to AI SDK format.
- * Handles text content and tool calls.
+ * Handles reasoning, text content, and tool calls.
+ *
+ * The reasoning part is required when the message is replayed to providers
+ * that demand the previous turn's thinking trace round-trip back — DeepSeek-R1
+ * rejects requests with "The reasoning_content in the thinking mode must be
+ * passed back to the API" if the assistant message in history lacks the
+ * reasoning that was emitted on the prior turn.
  */
 function convertAssistantMessage(msg: InternalMessage): ModelMessage | undefined {
   const textContent = extractTextContent(msg)
   const hasToolCalls = msg.toolCalls && msg.toolCalls.length > 0
+  const hasReasoning = Boolean(msg.reasoning)
 
-  if (!textContent && !hasToolCalls) {
+  if (!textContent && !hasToolCalls && !hasReasoning) {
     return undefined
   }
 
-  // Simple text-only case
-  if (textContent && !hasToolCalls) {
+  // Simple text-only case (no reasoning, no tools)
+  if (textContent && !hasToolCalls && !hasReasoning) {
     return {content: textContent, role: 'assistant'}
   }
 
-  // Build mixed content array (text + tool calls)
+  // Build mixed content array (reasoning + text + tool calls)
   type AssistantPart =
     | {input: unknown; providerOptions?: Record<string, Record<string, unknown>>; toolCallId: string; toolName: string; type: 'tool-call'}
+    | {text: string; type: 'reasoning'}
     | {text: string; type: 'text'}
 
   const parts: AssistantPart[] = []
 
+  // Reasoning must come first — providers that consume it expect it at the
+  // start of the assistant turn, before any text/tool-call output.
+  if (msg.reasoning) {
+    parts.push({text: msg.reasoning, type: 'reasoning'})
+  }
+
   if (textContent) {
     parts.push({text: textContent, type: 'text'})
   }

@@ -9,7 +9,8 @@
  * - Grok: `reasoning_content` or `reasoning_details` fields
  * - Gemini via OpenRouter: `reasoning_details` array or `thoughts` field
  * - GLM (Zhipu AI): `reasoning_content` field in API response
- * - Claude/DeepSeek/MiniMax: `<think>...</think>` XML tags in content
+ * - DeepSeek (R1/Reasoner): `reasoning_content` field in API response (OpenAI-compatible)
+ * - Claude/MiniMax: `<think>...</think>` XML tags in content
  */
 
 /**
@@ -54,7 +55,7 @@
 * // { reasoning: true, reasoningFormat: 'think-tags' }
 * ```
 */
 export function getModelCapabilities(modelId: string): ModelCapabilities {
  const id = modelId.toLowerCase()

  // OpenAI reasoning models (o1, o3, gpt-5 series)
@@ -132,13 +133,14 @@
     }
   }
 
-  // DeepSeek models use think tags
+  // DeepSeek models — reasoning models stream `reasoning_content` natively
+  // (OpenAI-compatible field), not <think> tags.
   if (id.includes('deepseek')) {
-    // DeepSeek-R1 and reasoning models
     if (id.includes('r1') || id.includes('reasoner')) {
       return {
         reasoning: true,
-        reasoningFormat: 'think-tags',
+        reasoningField: 'reasoning_content',
+        reasoningFormat: 'native-field',
       }
     }
 

@@ -0,0 +1,40 @@
+/**
+ * DeepSeek Provider Module
+ *
+ * Access to DeepSeek V3 (deepseek-chat) and R1 (deepseek-reasoner) via their
+ * OpenAI-compatible API. The reasoner model streams thinking through the
+ * native `reasoning_content` field rather than `<think>` tags — see
+ * model-capabilities.ts for the parser routing.
+ */
+
+import {createOpenAICompatible} from '@ai-sdk/openai-compatible'
+
+import type {GeneratorFactoryConfig, ProviderModule} from './types.js'
+
+import {AiSdkContentGenerator} from '../generators/ai-sdk-content-generator.js'
+
+export const deepseekProvider: ProviderModule = {
+  apiKeyUrl: 'https://platform.deepseek.com/api_keys',
+  authType: 'api-key',
+  baseUrl: 'https://api.deepseek.com/v1',
+  category: 'other',
+  createGenerator(config: GeneratorFactoryConfig) {
+    const provider = createOpenAICompatible({
+      apiKey: config.apiKey!,
+      baseURL: 'https://api.deepseek.com/v1',
+      name: 'deepseek',
+    })
+
+    return new AiSdkContentGenerator({
+      model: provider.chatModel(config.model),
+    })
+  },
+  defaultModel: 'deepseek-chat',
+  description: 'DeepSeek V3 and R1 reasoning models',
+  envVars: ['DEEPSEEK_API_KEY'],
+  id: 'deepseek',
+  name: 'DeepSeek',
+  priority: 19,
+
+  providerType: 'openai',
+}
@@ -0,0 +1,39 @@
+/**
+ * GLM Coding Plan (Z.AI) Provider Module
+ *
+ * Same Z.AI account as the standard `glm` provider but routes through the
+ * coding-plan endpoint so subscription quota is consumed instead of
+ * pay-per-token billing.
+ */
+
+import {createOpenAICompatible} from '@ai-sdk/openai-compatible'
+
+import type {GeneratorFactoryConfig, ProviderModule} from './types.js'
+
+import {AiSdkContentGenerator} from '../generators/ai-sdk-content-generator.js'
+
+export const glmCodingPlanProvider: ProviderModule = {
+  apiKeyUrl: 'https://z.ai/manage-apikey/apikey-list',
+  authType: 'api-key',
+  baseUrl: 'https://api.z.ai/api/coding/paas/v4',
+  category: 'other',
+  createGenerator(config: GeneratorFactoryConfig) {
+    const provider = createOpenAICompatible({
+      apiKey: config.apiKey!,
+      baseURL: 'https://api.z.ai/api/coding/paas/v4',
+      name: 'glm-coding-plan',
+    })
+
+    return new AiSdkContentGenerator({
+      model: provider.chatModel(config.model),
+    })
+  },
+  defaultModel: 'glm-4.7',
+  description: 'GLM models on the Z.AI Coding Plan subscription',
+  envVars: ['ZHIPU_API_KEY'],
+  id: 'glm-coding-plan',
+  name: 'GLM Coding Plan (Z.AI)',
+  priority: 17.5,
+
+  providerType: 'openai',
+}
@@ -14,6 +14,8 @@ import {byteroverProvider} from './byterover.js'
 import {cerebrasProvider} from './cerebras.js'
 import {cohereProvider} from './cohere.js'
 import {deepinfraProvider} from './deepinfra.js'
+import {deepseekProvider} from './deepseek.js'
+import {glmCodingPlanProvider} from './glm-coding-plan.js'
 import {glmProvider} from './glm.js'
 import {googleProvider} from './google.js'
 import {groqProvider} from './groq.js'
@@ -38,7 +40,9 @@ const PROVIDER_MODULES: Readonly<Record<string, ProviderModule>> = {
   cerebras: cerebrasProvider,
   cohere: cohereProvider,
   deepinfra: deepinfraProvider,
+  deepseek: deepseekProvider,
   glm: glmProvider,
+  'glm-coding-plan': glmCodingPlanProvider,
   google: googleProvider,
   groq: groqProvider,
   minimax: minimaxProvider,

@@ -144,6 +144,19 @@ export const PROVIDER_REGISTRY: Readonly<Record<string, ProviderDefinition>> = {
     name: 'DeepInfra',
     priority: 10,
   },
+  deepseek: {
+    apiKeyUrl: 'https://platform.deepseek.com/api_keys',
+    baseUrl: 'https://api.deepseek.com/v1',
+    category: 'other',
+    defaultModel: 'deepseek-chat',
+    description: 'DeepSeek V3 and R1 reasoning models',
+    envVars: ['DEEPSEEK_API_KEY'],
+    headers: {},
+    id: 'deepseek',
+    modelsEndpoint: '/models',
+    name: 'DeepSeek',
+    priority: 19,
+  },
   glm: {
     apiKeyUrl: 'https://open.z.ai',
     baseUrl: 'https://api.z.ai/api/paas/v4',
@@ -157,6 +170,19 @@ export const PROVIDER_REGISTRY: Readonly<Record<string, ProviderDefinition>> = {
     name: 'GLM (Z.AI)',
     priority: 17,
   },
+  'glm-coding-plan': {
+    apiKeyUrl: 'https://z.ai/manage-apikey/apikey-list',
+    baseUrl: 'https://api.z.ai/api/coding/paas/v4',
+    category: 'other',
+    defaultModel: 'glm-4.7',
+    description: 'GLM models on the Z.AI Coding Plan subscription',
+    envVars: ['ZHIPU_API_KEY'],
+    headers: {},
+    id: 'glm-coding-plan',
+    modelsEndpoint: '',
+    name: 'GLM Coding Plan (Z.AI)',
+    priority: 17.5,
+  },
   google: {
     apiKeyUrl: 'https://aistudio.google.com/apikey',
     baseUrl: '',

@@ -63,6 +63,7 @@ export async function getModelFetcher(providerId: string): Promise<IProviderMode
     case 'cerebras': // falls through
     case 'cohere': // falls through
     case 'deepinfra': // falls through
+    case 'deepseek': // falls through
     case 'groq': // falls through
     case 'mistral': // falls through
     case 'togetherai': // falls through
@@ -85,6 +86,16 @@ export async function getModelFetcher(providerId: string): Promise<IProviderMode
       break
     }
 
+    case 'glm-coding-plan': {
+      fetcher = new ChatBasedModelFetcher(
+        'https://api.z.ai/api/coding/paas/v4',
+        'GLM Coding Plan (Z.AI)',
+        ['glm-4.7', 'glm-4.7-flash', 'glm-4.7-flashx', 'glm-5-turbo', 'glm-4.5', 'glm-4.5-flash'],
+      )
+
+      break
+    }
+
     case 'google': {
       fetcher = new GoogleModelFetcher()