From 409761dced9d1e341929f10be37ed6ed18b240b2 Mon Sep 17 00:00:00 2001 From: FallDownTheSystem <8807171+FallDownTheSystem@users.noreply.github.com> Date: Fri, 7 Mar 2025 15:20:38 +0200 Subject: [PATCH 1/3] Add support for reasoning in the UI Add UI settings to set reasoning effort / token budget Add UI to show reasoning tokens from Anthropic Claude 3.7 Sonnet and DeepSeek R1 --- core/index.d.ts | 24 +- core/llm/autodetect.ts | 39 +- core/llm/countTokens.ts | 29 +- core/llm/llms/Anthropic.ts | 367 +++++++++++++++--- core/llm/llms/FreeTrial.ts | 3 +- core/llm/llms/Gemini.ts | 3 +- core/llm/llms/WatsonX.ts | 9 +- core/llm/openaiTypeConverters.ts | 42 +- core/util/chatDescriber.ts | 1 + core/util/messageContent.ts | 9 +- docs/docs/json-reference.md | 18 + docs/docs/reference.md | 18 + docs/docs/yaml-reference.md | 16 + extensions/vscode/config_schema.json | 36 ++ gui/src/components/Layout.tsx | 3 + .../components/ModelSettingsInitializer.tsx | 15 + .../StepContainer/StepContainer.tsx | 8 +- gui/src/components/mainInput/InputToolbar.tsx | 223 ++++++----- .../InputToolbar/PopoverNoMoveTransition.tsx | 26 ++ .../InputToolbar/ToggleThinkingButton.tsx | 301 ++++++++++++++ .../InputToolbar/ToggleToolsButton.tsx | 178 ++++----- .../InputToolbar/ToolDropdownItem.tsx | 2 +- .../components/modelSelection/ModelSelect.tsx | 8 +- gui/src/hooks/useModelThinkingSettings.ts | 53 +++ gui/src/pages/gui/Chat.tsx | 98 ++--- gui/src/redux/slices/sessionSlice.ts | 261 +++++++++++-- gui/src/redux/slices/uiSlice.ts | 37 ++ gui/src/redux/thunks/streamNormalInput.ts | 53 ++- 28 files changed, 1507 insertions(+), 373 deletions(-) create mode 100644 gui/src/components/ModelSettingsInitializer.tsx create mode 100644 gui/src/components/mainInput/InputToolbar/PopoverNoMoveTransition.tsx create mode 100644 gui/src/components/mainInput/InputToolbar/ToggleThinkingButton.tsx create mode 100644 gui/src/hooks/useModelThinkingSettings.ts diff --git a/core/index.d.ts b/core/index.d.ts index f9daec5039..0d470b7bd2 100644 --- a/core/index.d.ts +++ b/core/index.d.ts @@ -324,7 +324,22 @@ export type ImageMessagePart = { imageUrl: { url: string }; }; -export type MessagePart = TextMessagePart | ImageMessagePart; +export type ThinkingMessagePart = { + type: "thinking"; + thinking: string; + signature: string; +}; + +export type RedactedThinkingMessagePart = { + type: "redacted_thinking"; + data: string; +}; + +export type MessagePart = + | TextMessagePart + | ImageMessagePart + | ThinkingMessagePart + | RedactedThinkingMessagePart; export type MessageContent = string | MessagePart[]; @@ -360,6 +375,7 @@ export interface UserChatMessage { export interface AssistantChatMessage { role: "assistant"; content: MessageContent; + reasoning_content?: string; toolCalls?: ToolCallDelta[]; } @@ -921,11 +937,17 @@ export interface BaseCompletionOptions { prediction?: Prediction; tools?: Tool[]; toolChoice?: ToolChoice; + thinking?: { + type: "enabled" | "disabled"; + budget_tokens?: number; + }; + reasoning_effort?: "high" | "medium" | "low"; } export interface ModelCapability { uploadImage?: boolean; tools?: boolean; + thinking?: boolean; } export interface ModelDescription { diff --git a/core/llm/autodetect.ts b/core/llm/autodetect.ts index 4fe864bfc5..252c7b5699 100644 --- a/core/llm/autodetect.ts +++ b/core/llm/autodetect.ts @@ -174,7 +174,8 @@ function autodetectTemplateType(model: string): TemplateType | undefined { lower.includes("pplx") || lower.includes("gemini") || lower.includes("grok") || - lower.includes("moonshot") + lower.includes("moonshot") || + lower.includes("deepseek-reasoner") ) { return undefined; } @@ -373,11 +374,45 @@ function autodetectPromptTemplates( return templates; } +const PROVIDER_SUPPORTS_THINKING: string[] = ["anthropic", "openai", "deepseek"]; + +const MODEL_SUPPORTS_THINKING: string[] = [ + "claude-3-7-sonnet-20250219", + "claude-3-7-sonnet-latest", + "o3-mini", + "o3-mini-2025-01-31", + "o1", + "o1-2024-12-17", + "deepseek-reasoner", +]; + +function modelSupportsThinking( + provider: string, + model: string, + title: string | undefined, + capabilities: ModelCapability | undefined, +): boolean { + if (capabilities?.thinking !== undefined) { + return capabilities.thinking; + } + + if (!PROVIDER_SUPPORTS_THINKING.includes(provider)) { + return false; + } + + const lower = model.toLowerCase(); + return MODEL_SUPPORTS_THINKING.some( + (modelName) => lower.includes(modelName) || title?.includes(modelName), + ); +} + export { autodetectPromptTemplates, autodetectTemplateFunction, autodetectTemplateType, llmCanGenerateInParallel, modelSupportsImages, - modelSupportsTools, + modelSupportsThinking, + modelSupportsTools }; + diff --git a/core/llm/countTokens.ts b/core/llm/countTokens.ts index dfbd2da5d7..3dd21a1b05 100644 --- a/core/llm/countTokens.ts +++ b/core/llm/countTokens.ts @@ -90,8 +90,15 @@ async function countTokensAsync( const promises = content.map(async (part) => { if (part.type === "imageUrl") { return countImageTokens(part); + } else if (part.type === "thinking") { + return (await encoding.encode(part.thinking ?? "")).length; + } else if (part.type === "redacted_thinking") { + // For redacted thinking, don't count any tokens + return 0; + } else if (part.type === "text") { + return (await encoding.encode(part.text ?? "")).length; } - return (await encoding.encode(part.text ?? "")).length; + return 0; }); return (await Promise.all(promises)).reduce((sum, val) => sum + val, 0); } @@ -106,12 +113,17 @@ function countTokens( const encoding = encodingForModel(modelName); if (Array.isArray(content)) { return content.reduce((acc, part) => { - return ( - acc + - (part.type === "text" - ? encoding.encode(part.text ?? "", "all", []).length - : countImageTokens(part)) - ); + if (part.type === "text") { + return acc + encoding.encode(part.text ?? "", "all", []).length; + } else if (part.type === "imageUrl") { + return acc + countImageTokens(part); + } else if (part.type === "thinking") { + return acc + encoding.encode(part.thinking ?? "", "all", []).length; + } else if (part.type === "redacted_thinking") { + // For redacted thinking, don't count any tokens + return acc; + } + return acc; }, 0); } else { return encoding.encode(content ?? "", "all", []).length; @@ -469,5 +481,6 @@ export { pruneLinesFromTop, pruneRawPromptFromTop, pruneStringFromBottom, - pruneStringFromTop, + pruneStringFromTop }; + diff --git a/core/llm/llms/Anthropic.ts b/core/llm/llms/Anthropic.ts index dcd13d38e4..38fb05a35a 100644 --- a/core/llm/llms/Anthropic.ts +++ b/core/llm/llms/Anthropic.ts @@ -1,4 +1,11 @@ -import { ChatMessage, CompletionOptions, LLMOptions } from "../../index.js"; +import { + ChatMessage, + CompletionOptions, + LLMOptions, + RedactedThinkingMessagePart, + TextMessagePart, + ThinkingMessagePart, +} from "../../index.js"; import { renderChatMessage, stripImages } from "../../util/messageContent.js"; import { BaseLLM } from "../index.js"; import { streamSse } from "../stream.js"; @@ -36,8 +43,16 @@ class Anthropic extends BaseLLM { name: options.toolChoice.function.name, } : undefined, + thinking: options.thinking, }; + if ( + finalOptions.thinking?.type === "disabled" && + "budget_tokens" in finalOptions.thinking + ) { + delete finalOptions.thinking.budget_tokens; + } + return finalOptions; } @@ -53,15 +68,54 @@ class Anthropic extends BaseLLM { }, ], }; - } else if (message.role === "assistant" && message.toolCalls) { - return { - role: "assistant", - content: message.toolCalls.map((toolCall) => ({ + } else if (message.role === "assistant") { + // Start with an empty content array + const content: any[] = []; + + // 1. Add thinking blocks first (if present in array content) + if (Array.isArray(message.content)) { + const thinkingBlocks = message.content.filter( + (part) => + part.type === "thinking" || part.type === "redacted_thinking", + ); + + if (thinkingBlocks.length > 0) { + content.push(...thinkingBlocks); + } + } + + // 2. Add tool calls (if present) + if (message.toolCalls?.length) { + const toolUseBlocks = message.toolCalls.map((toolCall) => ({ type: "tool_use", id: toolCall.id, name: toolCall.function?.name, input: JSON.parse(toolCall.function?.arguments || "{}"), - })), + })); + + content.push(...toolUseBlocks); + } + + // 3. Add text parts (if present) + if (Array.isArray(message.content)) { + const textBlocks = message.content.filter( + (part) => part.type === "text", + ); + if (textBlocks.length > 0) { + content.push(...textBlocks); + } + } else if (typeof message.content === "string" && message.content) { + content.push({ + type: "text", + text: message.content, + ...(addCaching ? { cache_control: { type: "ephemeral" } } : {}), + }); + } + + // Return full assistant message with combined content + return { + role: "assistant", + content: content.length > 0 ? content : "", // Handle empty content case }; } @@ -79,19 +133,34 @@ class Anthropic extends BaseLLM { return chatMessage; } - return { - role: message.role, - content: message.content.map((part, contentIdx) => { - if (part.type === "text") { - const newpart = { - ...part, - // If multiple text parts, only add cache_control to the last one - ...(addCaching && contentIdx == message.content.length - 1 - ? { cache_control: { type: "ephemeral" } } - : {}), - }; - return newpart; - } + // Filter out empty thinking blocks before mapping + const filteredContent = Array.isArray(message.content) + ? message.content.filter( + (part) => + !( + part.type === "thinking" && + (!part.thinking || part.thinking.trim() === "") && + (!part.signature || part.signature.trim() === "") + ), + ) + : message.content; + + const convertedContent = ( + Array.isArray(filteredContent) ? filteredContent : [filteredContent] + ).map((part, contentIdx) => { + if (part.type === "text") { + const newpart = { + ...part, + // If multiple text parts, only add cache_control to the last one + ...(addCaching && + contentIdx == + (Array.isArray(filteredContent) ? filteredContent.length : 1) - 1 + ? { cache_control: { type: "ephemeral" } } + : {}), + }; + return newpart; + } + if (part.type === "imageUrl") { return { type: "image", source: { @@ -100,8 +169,32 @@ class Anthropic extends BaseLLM { data: part.imageUrl?.url.split(",")[1], }, }; - }), + } + if (part.type === "thinking") { + // Make sure to preserve the thinking and signature fields + return { + type: "thinking", + thinking: part.thinking, + signature: part.signature, + }; + } + if (part.type === "redacted_thinking") { + // Make sure to preserve the data field + return { + type: "redacted_thinking", + data: part.data, + }; + } + // Pass through other blocks as-is + return part; + }); + + const result = { + role: message.role, + content: convertedContent, }; + + return result; } public convertMessages(msgs: ChatMessage[]): any[] { @@ -158,30 +251,38 @@ class Anthropic extends BaseLLM { ); const msgs = this.convertMessages(messages); + + // Merge default headers with custom headers + const headers: any = { + "Content-Type": "application/json", + Accept: "application/json", + "anthropic-version": "2023-06-01", + "x-api-key": this.apiKey as string, + ...this.requestOptions?.headers, + }; + + // Handle the special case for anthropic-beta + this.setBetaHeaders(headers, shouldCacheSystemMessage); + + // Create the request body + const requestBody = { + ...this.convertArgs(options), + messages: msgs, + system: shouldCacheSystemMessage + ? [ + { + type: "text", + text: this.systemMessage, + cache_control: { type: "ephemeral" }, + }, + ] + : systemMessage, + }; + const response = await this.fetch(new URL("messages", this.apiBase), { method: "POST", - headers: { - "Content-Type": "application/json", - Accept: "application/json", - "anthropic-version": "2023-06-01", - "x-api-key": this.apiKey as string, - ...(shouldCacheSystemMessage || this.cacheBehavior?.cacheConversation - ? { "anthropic-beta": "prompt-caching-2024-07-31" } - : {}), - }, - body: JSON.stringify({ - ...this.convertArgs(options), - messages: msgs, - system: shouldCacheSystemMessage - ? [ - { - type: "text", - text: this.systemMessage, - cache_control: { type: "ephemeral" }, - }, - ] - : systemMessage, - }), + headers, + body: JSON.stringify(requestBody), signal, }); @@ -202,31 +303,166 @@ class Anthropic extends BaseLLM { if (options.stream === false) { const data = await response.json(); - yield { role: "assistant", content: data.content[0].text }; + + // Check if there are thinking blocks in the response + const thinkingBlocks = data.content.filter( + (block: any) => + block.type === "thinking" || block.type === "redacted_thinking", + ); + + const textBlocks = data.content.filter( + (block: any) => block.type === "text", + ); + + // First yield thinking blocks if they exist + for (const block of thinkingBlocks) { + if (block.type === "thinking") { + const thinkingPart: ThinkingMessagePart = { + type: "thinking", + thinking: block.thinking, + signature: block.signature, + }; + + // Yield thinking content + yield { + role: "assistant", + content: [thinkingPart], + }; + } else if (block.type === "redacted_thinking") { + const redactedPart: RedactedThinkingMessagePart = { + type: "redacted_thinking", + data: block.data, + }; + + // Yield redacted thinking content + yield { + role: "assistant", + content: [redactedPart], + }; + } + } + + // Then yield text blocks as a separate message + if (textBlocks.length > 0) { + for (const block of textBlocks) { + if (block.type === "text") { + const textPart: TextMessagePart = { + type: "text", + text: block.text, + }; + + // Yield text content + yield { + role: "assistant", + content: [textPart], + }; + } + } + } return; } + // State for tracking different content blocks let lastToolUseId: string | undefined; let lastToolUseName: string | undefined; + let thinkingBlockIndex: number | undefined; + let thinkingBlocksById: Map = new Map(); + let thinkingSignaturesById: Map = new Map(); + let textContent = ""; + for await (const value of streamSse(response)) { - // https://docs.anthropic.com/en/api/messages-streaming#event-types switch (value.type) { case "content_block_start": if (value.content_block.type === "tool_use") { lastToolUseId = value.content_block.id; lastToolUseName = value.content_block.name; + } else if (value.content_block.type === "thinking") { + thinkingBlockIndex = value.index; + thinkingBlocksById.set(value.index, ""); + } else if (value.content_block.type === "redacted_thinking") { + // Emit redacted thinking blocks immediately + yield { + role: "assistant", + content: [ + { + type: "redacted_thinking", + data: value.content_block.data, + }, + ], + }; } break; + case "content_block_delta": - // https://docs.anthropic.com/en/api/messages-streaming#delta-types switch (value.delta.type) { case "text_delta": - yield { role: "assistant", content: value.delta.text }; + textContent += value.delta.text; + + // Emit text content as it comes in + yield { + role: "assistant", + content: [ + { + type: "text", + text: value.delta.text, + }, + ], + }; + break; + + case "thinking_delta": + if (thinkingBlockIndex !== undefined) { + // Accumulate thinking content + const currentContent = + thinkingBlocksById.get(thinkingBlockIndex) || ""; + const newContent = currentContent + value.delta.thinking; + thinkingBlocksById.set(thinkingBlockIndex, newContent); + + // Emit thinking content as it comes in + yield { + role: "assistant", + content: [ + { + type: "thinking", + thinking: value.delta.thinking, + signature: "", // Empty signature for deltas + }, + ], + }; + } + break; + + case "signature_delta": + if (thinkingBlockIndex !== undefined) { + // Store the signature + thinkingSignaturesById.set( + thinkingBlockIndex, + value.delta.signature, + ); + + // Get the accumulated content + const thinkingContent = + thinkingBlocksById.get(thinkingBlockIndex) || ""; + + // Emit complete thinking block with signature + yield { + role: "assistant", + content: [ + { + type: "thinking", + thinking: thinkingContent, + signature: value.delta.signature, + }, + ], + }; + } break; + case "input_json_delta": if (!lastToolUseId || !lastToolUseName) { throw new Error("No tool use found"); } + // Emit tool call yield { role: "assistant", content: "", @@ -244,15 +480,46 @@ class Anthropic extends BaseLLM { break; } break; + case "content_block_stop": - lastToolUseId = undefined; - lastToolUseName = undefined; - break; - default: + if (value.index === thinkingBlockIndex) { + thinkingBlockIndex = undefined; + } + + if (value.content_block?.type === "tool_use") { + lastToolUseId = undefined; + lastToolUseName = undefined; + } break; } } } + + private setBetaHeaders( + headers: any, + shouldCacheSystemMessage: boolean | undefined, + ) { + const betaValues = new Set(); + + // Add from existing header if present + const existingBeta = headers["anthropic-beta"]; + if (existingBeta && typeof existingBeta === "string") { + existingBeta + .split(",") + .map((v) => v.trim()) + .forEach((v) => betaValues.add(v)); + } + + // Add caching header if we should + if (shouldCacheSystemMessage || this.cacheBehavior?.cacheConversation) { + betaValues.add("prompt-caching-2024-07-31"); + } + + // Update the header if we have values + if (betaValues.size > 0) { + headers["anthropic-beta"] = Array.from(betaValues).join(","); + } + } } export default Anthropic; diff --git a/core/llm/llms/FreeTrial.ts b/core/llm/llms/FreeTrial.ts index ab69aff705..e505297129 100644 --- a/core/llm/llms/FreeTrial.ts +++ b/core/llm/llms/FreeTrial.ts @@ -6,6 +6,7 @@ import { Chunk, CompletionOptions, LLMOptions, + TextMessagePart, } from "../../index.js"; import { BaseLLM } from "../index.js"; import { streamResponse } from "../stream.js"; @@ -124,7 +125,7 @@ class FreeTrial extends BaseLLM { } return { type: "text", - text: part.text, + text: (part as TextMessagePart).text, }; }); return { diff --git a/core/llm/llms/Gemini.ts b/core/llm/llms/Gemini.ts index ee24cc3e95..9683b14abe 100644 --- a/core/llm/llms/Gemini.ts +++ b/core/llm/llms/Gemini.ts @@ -2,6 +2,7 @@ import { AssistantChatMessage, ChatMessage, CompletionOptions, + ImageMessagePart, LLMOptions, MessagePart, ToolCallDelta, @@ -129,7 +130,7 @@ class Gemini extends BaseLLM { : { inlineData: { mimeType: "image/jpeg", - data: part.imageUrl?.url.split(",")[1], + data: (part as ImageMessagePart).imageUrl?.url.split(",")[1], }, }; } diff --git a/core/llm/llms/WatsonX.ts b/core/llm/llms/WatsonX.ts index 17c8b1d4b4..fde1ed15cc 100644 --- a/core/llm/llms/WatsonX.ts +++ b/core/llm/llms/WatsonX.ts @@ -1,4 +1,9 @@ -import { ChatMessage, CompletionOptions, LLMOptions } from "../../index.js"; +import { + ChatMessage, + CompletionOptions, + LLMOptions, + TextMessagePart, +} from "../../index.js"; import { renderChatMessage } from "../../util/messageContent.js"; import { BaseLLM } from "../index.js"; import { streamResponse } from "../stream.js"; @@ -103,7 +108,7 @@ class WatsonX extends BaseLLM { } return { type: "text", - text: part.text, + text: (part as TextMessagePart).text, }; }); return { diff --git a/core/llm/openaiTypeConverters.ts b/core/llm/openaiTypeConverters.ts index 8e0acef566..0df9588a6a 100644 --- a/core/llm/openaiTypeConverters.ts +++ b/core/llm/openaiTypeConverters.ts @@ -1,21 +1,32 @@ import { FimCreateParamsStreaming } from "@continuedev/openai-adapters/dist/apis/base"; import { - Chat, ChatCompletion, ChatCompletionAssistantMessageParam, ChatCompletionChunk, ChatCompletionCreateParams, ChatCompletionMessageParam, - ChatCompletionUserMessageParam, CompletionCreateParams, } from "openai/resources/index"; -import { - ChatMessage, - CompletionOptions, - MessageContent, - TextMessagePart, -} from ".."; +import { ChatMessage, CompletionOptions, TextMessagePart } from ".."; + +// Extend OpenAI API types to support DeepSeek reasoning_content field +interface DeepSeekDelta { + reasoning_content?: string; + content?: string; + role?: string; + tool_calls?: any[]; +} + +interface DeepSeekChatCompletionChunk + extends Omit { + choices?: Array<{ + delta: DeepSeekDelta; + index: number; + finish_reason: string | null; + logprobs?: object | null; + }>; +} export function toChatMessage( message: ChatMessage, @@ -83,7 +94,7 @@ export function toChatMessage( }, }; } - return part; + return part as TextMessagePart; }), }; } @@ -176,11 +187,18 @@ export function fromChatResponse(response: ChatCompletion): ChatMessage { } export function fromChatCompletionChunk( - chunk: ChatCompletionChunk, + chunk: ChatCompletionChunk | DeepSeekChatCompletionChunk, ): ChatMessage | undefined { - const delta = chunk.choices?.[0]?.delta; + const delta = chunk.choices?.[0]?.delta as DeepSeekDelta; - if (delta?.content) { + // Handle reasoning_content (for DeepSeek and compatible models) + if (delta?.reasoning_content) { + return { + role: "assistant", + content: "", + reasoning_content: delta.reasoning_content, + }; + } else if (delta?.content) { return { role: "assistant", content: delta.content, diff --git a/core/util/chatDescriber.ts b/core/util/chatDescriber.ts index a10edbe722..11a62ae63a 100644 --- a/core/util/chatDescriber.ts +++ b/core/util/chatDescriber.ts @@ -28,6 +28,7 @@ export class ChatDescriber { } completionOptions.maxTokens = ChatDescriber.maxTokens; + completionOptions.thinking = undefined; // Prompt the user's current LLM for the title const titleResponse = await model.chat( diff --git a/core/util/messageContent.ts b/core/util/messageContent.ts index 89462a1bf3..08a39b566b 100644 --- a/core/util/messageContent.ts +++ b/core/util/messageContent.ts @@ -17,12 +17,19 @@ export function stripImages(messageContent: MessageContent): string { .join("\n"); } +export function stripThinking(content: string): string { + if (typeof content === "string") { + return content.replace(/[\s\S]*?<\/think>/g, ""); + } + return content; +} + export function renderChatMessage(message: ChatMessage): string { switch (message.role) { case "user": case "assistant": case "system": - return stripImages(message.content); + return stripThinking(stripImages(message.content)); case "tool": return message.content; } diff --git a/docs/docs/json-reference.md b/docs/docs/json-reference.md index 11e92a183c..68ff9afade 100644 --- a/docs/docs/json-reference.md +++ b/docs/docs/json-reference.md @@ -37,6 +37,7 @@ Each model has specific configuration options tailored to its provider and funct - `capabilities`: Override auto-detected capabilities: - `uploadImage`: Boolean indicating if the model supports image uploads. - `tools`: Boolean indicating if the model supports tool calls. + - `thinking`: Boolean indicating if the model supports thinking capabilities. _(AWS Only)_ @@ -60,6 +61,19 @@ Example: "title": "GPT-4o", "provider": "openai", "apiKey": "" + }, + { + "title": "Claude 3.7 Sonnet", + "provider": "anthropic", + "model": "claude-3-7-sonnet-20250219", + "apiKey": "", + "completionOptions": { + "maxTokens": 8192, + "thinking": { + "type": "enabled", + "budget_tokens": 4096 + } + } } ] } @@ -157,6 +171,10 @@ Parameters that control the behavior of text generation and completion settings. - `numThreads`: The number of threads used during the generation process. Available only for Ollama as `num_thread`. - `keepAlive`: For Ollama, this parameter sets the number of seconds to keep the model loaded after the last request, unloading it from memory if inactive (default: `1800` seconds, or 30 minutes). - `useMmap`: For Ollama, this parameter allows the model to be mapped into memory. If disabled can enhance response time on low end devices but will slow down the stream. +- `thinking`: Enable extended thinking capabilities for models that support it (e.g., Claude 3.7 Sonnet). This is an object with the following properties: + - `type`: The type of thinking to enable ("enabled" or "disabled"). + - `budget_tokens`: The maximum number of tokens to allocate for thinking. Must be less than maxTokens and at least 1024. Only required if `type` is "enabled". +- `reasoning_effort`: The reasoning effort to use for the model. For OpenAI o1 and o3-mini. Supports "high", "medium", and "low". Example diff --git a/docs/docs/reference.md b/docs/docs/reference.md index 143fec98b0..3f65ae244a 100644 --- a/docs/docs/reference.md +++ b/docs/docs/reference.md @@ -37,6 +37,7 @@ Each model has specific configuration options tailored to its provider and funct - `capabilities`: Override auto-detected capabilities: - `uploadImage`: Boolean indicating if the model supports image uploads. - `tools`: Boolean indicating if the model supports tool use. + - `thinking`: Boolean indicating if the model supports extended thinking capabilities. _(AWS Only)_ @@ -60,6 +61,19 @@ Example: "title": "GPT-4o", "provider": "openai", "apiKey": "YOUR_API_KEY" + }, + { + "title": "Claude 3.7 Sonnet", + "provider": "anthropic", + "model": "claude-3-7-sonnet-20250219", + "apiKey": "YOUR_ANTHROPIC_API_KEY", + "completionOptions": { + "maxTokens": 8192, + "thinking": { + "type": "enabled", + "budget_tokens": 4096 + } + } } ] } @@ -158,6 +172,10 @@ Parameters that control the behavior of text generation and completion settings. - `numThreads`: The number of threads used during the generation process. Available only for Ollama as `num_thread`. - `keepAlive`: For Ollama, this parameter sets the number of seconds to keep the model loaded after the last request, unloading it from memory if inactive (default: `1800` seconds, or 30 minutes). - `useMmap`: For Ollama, this parameter allows the model to be mapped into memory. If disabled can enhance response time on low end devices but will slow down the stream. +- `thinking`: Enable extended thinking capabilities for models that support it (e.g., Claude 3.7 Sonnet). This is an object with the following properties: + - `type`: The type of thinking to enable (currently only "enabled" is supported). + - `budget_tokens`: The maximum number of tokens to allocate for thinking. Must be less than maxTokens and at least 1024. Only required if `type` is "enabled". +- `reasoning_effort`: The reasoning effort to use for the model. For OpenAI o1 and o3-mini. Supports "high", "medium", and "low". Example diff --git a/docs/docs/yaml-reference.md b/docs/docs/yaml-reference.md index 600b09d537..d179fd2ea9 100644 --- a/docs/docs/yaml-reference.md +++ b/docs/docs/yaml-reference.md @@ -155,6 +155,10 @@ The `models` section defines the language models used in your configuration. Mod - `topK`: Maximum number of tokens considered at each step. - `stop`: An array of stop tokens that will terminate the completion. - `n`: Number of completions to generate. + - `thinking`: Enable extended thinking capabilities for models that support it (e.g., Claude 3.7 Sonnet). This is an object with the following properties: + - `type`: The type of thinking to enable (currently only "enabled" is supported). + - `budget_tokens`: The maximum number of tokens to allocate for thinking. Must be less than maxTokens and at least 1024. Only required if `type` is "enabled". + - `reasoning_effort`: The reasoning effort to use for the model. For OpenAI o1 and o3-mini. Supports "high", "medium", and "low". - `requestOptions`: HTTP request options specific to the model. @@ -391,6 +395,18 @@ models: temperature: 0.3 stop: - "\n" + + - name: Claude 3.7 Sonnet + provider: anthropic + model: claude-3-7-sonnet-20250219 + roles: + - chat + - edit + defaultCompletionOptions: + maxTokens: 8192 + thinking: + type: enabled + budget_tokens: 4096 rules: - Give concise responses diff --git a/extensions/vscode/config_schema.json b/extensions/vscode/config_schema.json index bf83c12341..90c6776450 100644 --- a/extensions/vscode/config_schema.json +++ b/extensions/vscode/config_schema.json @@ -71,6 +71,38 @@ "title": "Ollama keep_alive", "description": "The number of seconds after no requests are made to unload the model from memory. Defaults to 60*30 = 30min", "type": "integer" + }, + "thinking": { + "title": "Thinking", + "description": "Enable extended thinking capabilities for models that support it (e.g., Claude 3.7 Sonnet). This allows the model to show its reasoning process.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["enabled", "disabled"], + "description": "Extended thinking enabled or disabled." + }, + "budget_tokens": { + "type": "integer", + "description": "The maximum number of tokens to allocate for thinking. Must be less than maxTokens.", + "minimum": 1024 + } + }, + "required": ["type"], + "if": { + "properties": { + "type": { "const": "enabled" } + } + }, + "then": { + "required": ["budget_tokens"] + } + }, + "reasoning_effort": { + "title": "Reasoning Effort", + "description": "The reasoning effort to use for the model. For OpenAI o1 and o3-mini. Supports 'high', 'medium', and 'low'.", + "type": "string", + "enum": ["high", "medium", "low"] } } }, @@ -452,6 +484,10 @@ "tools": { "type": "boolean", "description": "Indicates whether the model can use tools" + }, + "thinking": { + "type": "boolean", + "description": "Indicates whether the model supports extended thinking capabilities" } } } diff --git a/gui/src/components/Layout.tsx b/gui/src/components/Layout.tsx index a10fc7b6ee..92bd9bcc41 100644 --- a/gui/src/components/Layout.tsx +++ b/gui/src/components/Layout.tsx @@ -22,6 +22,7 @@ import { incrementFreeTrialCount } from "../util/freeTrial"; import { ROUTES } from "../util/navigation"; import TextDialog from "./dialogs"; import Footer from "./Footer"; +import ModelSettingsInitializer from "./ModelSettingsInitializer"; import { isNewUserOnboarding, useOnboardingCard } from "./OnboardingCard"; import OSRContextMenu from "./OSRContextMenu"; import PostHogPageView from "./PosthogPageView"; @@ -284,6 +285,8 @@ const Layout = () => { /> + {/* Initialize model-specific settings when model changes */} + diff --git a/gui/src/components/ModelSettingsInitializer.tsx b/gui/src/components/ModelSettingsInitializer.tsx new file mode 100644 index 0000000000..1ddbef90fd --- /dev/null +++ b/gui/src/components/ModelSettingsInitializer.tsx @@ -0,0 +1,15 @@ +import { useModelThinkingSettings } from "../hooks/useModelThinkingSettings"; + +/** + * This is a "headless" component that initializes model-specific settings. + * It doesn't render any UI but handles configuration initialization when models change. + */ +export function ModelSettingsInitializer() { + // Initialize thinking settings when model changes + useModelThinkingSettings(); + + // No visible UI - this is a utility component + return null; +} + +export default ModelSettingsInitializer; diff --git a/gui/src/components/StepContainer/StepContainer.tsx b/gui/src/components/StepContainer/StepContainer.tsx index b395db0763..8c26879ee1 100644 --- a/gui/src/components/StepContainer/StepContainer.tsx +++ b/gui/src/components/StepContainer/StepContainer.tsx @@ -1,5 +1,5 @@ import { ChatHistoryItem } from "core"; -import { renderChatMessage, stripImages } from "core/util/messageContent"; +import { renderChatMessage } from "core/util/messageContent"; import { useEffect, useState } from "react"; import { useDispatch } from "react-redux"; import styled from "styled-components"; @@ -9,9 +9,9 @@ import { selectUIConfig } from "../../redux/slices/configSlice"; import { deleteMessage } from "../../redux/slices/sessionSlice"; import { getFontSize } from "../../util"; import StyledMarkdownPreview from "../markdown/StyledMarkdownPreview"; +import Reasoning from "./Reasoning"; import ResponseActions from "./ResponseActions"; import ThinkingIndicator from "./ThinkingIndicator"; -import Reasoning from "./Reasoning"; interface StepContainerProps { item: ChatHistoryItem; @@ -92,11 +92,11 @@ export default function StepContainer(props: StepContainerProps) { ) : ( <> - + diff --git a/gui/src/components/mainInput/InputToolbar.tsx b/gui/src/components/mainInput/InputToolbar.tsx index ace2ae9910..be5d9f83d0 100644 --- a/gui/src/components/mainInput/InputToolbar.tsx +++ b/gui/src/components/mainInput/InputToolbar.tsx @@ -1,6 +1,10 @@ import { AtSymbolIcon, PhotoIcon } from "@heroicons/react/24/outline"; import { InputModifiers } from "core"; -import { modelSupportsImages, modelSupportsTools } from "core/llm/autodetect"; +import { + modelSupportsImages, + modelSupportsThinking, + modelSupportsTools, +} from "core/llm/autodetect"; import { useRef } from "react"; import styled from "styled-components"; import { @@ -27,6 +31,7 @@ import { import { ToolTip } from "../gui/Tooltip"; import ModelSelect from "../modelSelection/ModelSelect"; import HoverItem from "./InputToolbar/HoverItem"; +import ToggleThinkingButton from "./InputToolbar/ToggleThinkingButton"; import ToggleToolsButton from "./InputToolbar/ToggleToolsButton"; const StyledDiv = styled.div<{ isHidden?: boolean }>` @@ -91,6 +96,14 @@ function InputToolbar(props: InputToolbarProps) { const isEditModeAndNoCodeToEdit = isInEditMode && !hasCodeToEdit; const isEnterDisabled = props.disabled || isEditModeAndNoCodeToEdit; const toolsSupported = defaultModel && modelSupportsTools(defaultModel); + const thinkingSupported = + defaultModel && + modelSupportsThinking( + defaultModel.provider, + defaultModel.model, + defaultModel.title, + defaultModel.capabilities, + ); const supportsImages = defaultModel && @@ -106,123 +119,123 @@ function InputToolbar(props: InputToolbarProps) { -
- -
- {props.toolbarOptions?.hideImageUpload || - (supportsImages && ( - <> - { - const files = e.target?.files ?? []; - for (const file of files) { - props.onImageFileSelected?.(file); - } +
+ {props.toolbarOptions?.hideImageUpload || + (supportsImages && ( + <> + { + const files = e.target?.files ?? []; + for (const file of files) { + props.onImageFileSelected?.(file); + } + }} + /> + + { + fileInputRef.current?.click(); }} /> - - { - fileInputRef.current?.click(); - }} - /> - - Attach an image + + Attach an image + + + + ))} + {props.toolbarOptions?.hideAddContext || ( + + + + + Add context (files, docs, urls, etc.) + + + )} + + + +
+
+ +
+ {!props.toolbarOptions?.hideUseCodebase && !isInEditMode && ( +
+ {props.activeKey === "Alt" ? ( + + {`${getAltKeyLabel()}⏎ + ${useActiveFile ? "No active file" : "Active file"}`} + + ) : ( + + props.onEnter?.({ + useCodebase: true, + noContext: !useActiveFile, + }) + } + > + + {getMetaKeyLabel()}⏎ @codebase + + + Submit with the codebase as context ({getMetaKeyLabel()}⏎) - - ))} - {props.toolbarOptions?.hideAddContext || ( - - + )} +
+ )} - - Add context (files, docs, urls, etc.) - + {isInEditMode && ( + { + await dispatch( + loadLastSession({ + saveCurrentSession: false, + }), + ); + dispatch(exitEditMode()); + }} + > + + Esc to exit + )} - -
-
- -
- {!props.toolbarOptions?.hideUseCodebase && !isInEditMode && ( - - )} - - {isInEditMode && ( - { - await dispatch( - loadLastSession({ - saveCurrentSession: false, - }), - ); - dispatch(exitEditMode()); + if (props.onEnter) { + props.onEnter({ + useCodebase: isMetaEquivalentKeyPressed(e as any), + noContext: useActiveFile ? e.altKey : !e.altKey, + }); + } }} + disabled={isEnterDisabled} > - - Esc to exit + + ⏎ {props.toolbarOptions?.enterText ?? "Enter"} - - )} - - { - if (props.onEnter) { - props.onEnter({ - useCodebase: isMetaEquivalentKeyPressed(e as any), - noContext: useActiveFile ? e.altKey : !e.altKey, - }); - } - }} - disabled={isEnterDisabled} - > - - ⏎ {props.toolbarOptions?.enterText ?? "Enter"} - - - + + +
diff --git a/gui/src/components/mainInput/InputToolbar/PopoverNoMoveTransition.tsx b/gui/src/components/mainInput/InputToolbar/PopoverNoMoveTransition.tsx new file mode 100644 index 0000000000..8c7909c223 --- /dev/null +++ b/gui/src/components/mainInput/InputToolbar/PopoverNoMoveTransition.tsx @@ -0,0 +1,26 @@ +import { Transition } from "@headlessui/react"; + +export default function PopoverNoMoveTransition({ + children, + show, + afterLeave, +}: { + children: React.ReactNode; + show?: boolean; + afterLeave?: () => void; +}) { + return ( + + {children} + + ); +} diff --git a/gui/src/components/mainInput/InputToolbar/ToggleThinkingButton.tsx b/gui/src/components/mainInput/InputToolbar/ToggleThinkingButton.tsx new file mode 100644 index 0000000000..8ca0ccc31f --- /dev/null +++ b/gui/src/components/mainInput/InputToolbar/ToggleThinkingButton.tsx @@ -0,0 +1,301 @@ +import { Listbox } from "@headlessui/react"; +import { + EllipsisHorizontalCircleIcon as EllipsisHorizontalIcon, + LightBulbIcon as LightBulbIconOutline, +} from "@heroicons/react/24/outline"; +import { LightBulbIcon as LightBulbIconSolid } from "@heroicons/react/24/solid"; +import { useEffect, useRef, useState } from "react"; +import { useDispatch } from "react-redux"; +import { lightGray, vscForeground } from "../.."; +import { useAppSelector } from "../../../redux/hooks"; +import { selectDefaultModel } from "../../../redux/slices/configSlice"; +import { selectIsInEditMode } from "../../../redux/slices/sessionSlice"; +import { + setAnthropicBudgetTokens, + setOpenAIReasoningEffort, + toggleUseThinking, +} from "../../../redux/slices/uiSlice"; +import { ToolTip } from "../../gui/Tooltip"; +import InfoHover from "../../InfoHover"; +import HoverItem from "./HoverItem"; +import PopoverTransition from "./PopoverNoMoveTransition"; + +interface ThinkingButtonProps { + disabled: boolean; +} + +export default function ToggleThinkingButton(props: ThinkingButtonProps) { + const buttonRef = useRef(null); + const dispatch = useDispatch(); + const [isDropdownOpen, setDropdownOpen] = useState(false); + const [isHovered, setIsHovered] = useState(false); + const [isThinkingHovered, setIsThinkingHovered] = useState(false); + + const useThinking = useAppSelector((state) => state.ui.useThinking); + const thinkingSettings = useAppSelector((state) => state.ui.thinkingSettings); + const defaultModel = useAppSelector(selectDefaultModel); + const [showAbove, setShowAbove] = useState(false); + const isInEditMode = useAppSelector(selectIsInEditMode); + + const ThinkingIcon = useThinking ? LightBulbIconSolid : LightBulbIconOutline; + + // Get provider from default model + const provider = defaultModel?.provider || ""; + const hasThinkingOptions = provider !== "deepseek"; + const maxTokens = defaultModel?.completionOptions?.maxTokens || 8192; + const minTokens = 1024; + const maxBudgetTokens = Math.max(1024, maxTokens - 1024); // Leave room for response + + useEffect(() => { + const checkPosition = () => { + if (buttonRef.current) { + const rect = buttonRef.current.getBoundingClientRect(); + const windowHeight = window.innerHeight; + const spaceBelow = windowHeight - rect.bottom; + setShowAbove(spaceBelow < 250); + } + }; + + if (isDropdownOpen) { + checkPosition(); + } + }, [isDropdownOpen]); + + const alwaysThinking = + defaultModel?.model?.includes("deepseek-reasoner") || + defaultModel?.model?.includes("o1") || + defaultModel?.model?.includes("o3-mini"); + const isDisabled = props.disabled || isInEditMode; + + const handleToggleThinking = () => { + if (isDisabled) return; + if (alwaysThinking && useThinking) return; // Prevent turning off for special models + dispatch(toggleUseThinking()); + }; + + const formatTokens = (tokens: number) => { + if (tokens >= 1000) { + return `${Math.round(tokens / 1000)}K`; + } + return tokens.toString(); + }; + + // Create a ref to access the Listbox.Button + const listboxButtonRef = useRef(null); + + // Function to close dropdown by clicking the button + const closeDropdown = () => { + if (listboxButtonRef.current) { + listboxButtonRef.current.click(); + } + }; + + // Get OpenAI icon color based on reasoning effort + const getIconColor = () => { + if (provider === "openai" && useThinking) { + const effort = thinkingSettings.openai.reasoningEffort; + if (effort === "low") return "text-red-400"; + if (effort === "medium") return "text-yellow-400"; + if (effort === "high") return "text-green-400"; + } + return "text-gray-400"; + }; + + const iconColorClass = getIconColor(); + + return ( + +
!isDisabled && setIsHovered(true)} + onMouseLeave={() => { + !isDisabled && setIsHovered(false); + setIsThinkingHovered(false); + }} + > + setIsThinkingHovered(true)} + className={`h-4 w-4 ${iconColorClass} ${ + isDisabled ? "cursor-not-allowed" : "" + }`} + /> + {isDisabled && ( + + {isInEditMode + ? "Thinking not supported in edit mode" + : "This model does not support thinking"} + + )} + {!isDisabled && alwaysThinking && useThinking && ( + + Thinking can't be turned off for this model + + )} + {!useThinking && !isDisabled && !alwaysThinking && ( + + Enable thinking + + )} + + {useThinking && !isDisabled && ( + <> + setIsThinkingHovered(true)} + className="hidden align-top sm:flex" + > + Thinking + + {}} + as="div" + onClick={(e) => e.stopPropagation()} + disabled={isDisabled} + > + {({ open }) => ( + <> + {hasThinkingOptions && ( + { + buttonRef.current = el; + listboxButtonRef.current = el; + }} + onClick={(e) => { + e.stopPropagation(); + setDropdownOpen(!isDropdownOpen); + }} + className="text-lightgray flex cursor-pointer items-center border-none bg-transparent px-0 outline-none" + aria-disabled={isDisabled} + > + + + )} + setDropdownOpen(false)} + > + +
+
+ Thinking Settings{" "} + +

+ Control how much effort the model puts into + reasoning +

+ {provider === "anthropic" && ( +

+ Budget tokens: How many tokens to dedicate + to thinking +

+ )} + {provider === "openai" && ( +

+ Reasoning effort: Amount of effort to put + into reasoning +

+ )} +
+ } + /> +
+
+
+ {provider === "anthropic" && ( +
+
+ Budget Tokens + + {formatTokens( + thinkingSettings.anthropic.budgetTokens, + )} + /{formatTokens(maxTokens)} + +
+
+ 1K + { + dispatch( + setAnthropicBudgetTokens( + parseInt(e.target.value), + ), + ); + }} + className="h-1 w-full cursor-pointer appearance-none rounded-lg bg-gray-500 focus:outline-none focus:ring-1 focus:ring-gray-400" + /> + + {formatTokens(maxBudgetTokens)} + +
+
+ )} + {provider === "openai" && ( +
+ Reasoning Effort +
+ {["high", "medium", "low"].map((level) => ( +
{ + dispatch( + setOpenAIReasoningEffort( + level as "low" | "medium" | "high", + ), + ); + closeDropdown(); + setIsHovered(false); + }} + > +
+ + {level} + +
+ ))} +
+
+ )} +
+ + + + )} + + + )} +
+ + ); +} diff --git a/gui/src/components/mainInput/InputToolbar/ToggleToolsButton.tsx b/gui/src/components/mainInput/InputToolbar/ToggleToolsButton.tsx index 20dae91602..8da089d173 100644 --- a/gui/src/components/mainInput/InputToolbar/ToggleToolsButton.tsx +++ b/gui/src/components/mainInput/InputToolbar/ToggleToolsButton.tsx @@ -8,13 +8,13 @@ import { useEffect, useRef, useState } from "react"; import { useDispatch } from "react-redux"; import { lightGray, vscForeground } from "../.."; import { useAppSelector } from "../../../redux/hooks"; +import { selectIsInEditMode } from "../../../redux/slices/sessionSlice"; import { toggleUseTools } from "../../../redux/slices/uiSlice"; import { ToolTip } from "../../gui/Tooltip"; import InfoHover from "../../InfoHover"; import HoverItem from "./HoverItem"; -import PopoverTransition from "./PopoverTransition"; +import PopoverTransition from "./PopoverNoMoveTransition"; import ToolDropdownItem from "./ToolDropdownItem"; -import { selectIsInEditMode } from "../../../redux/slices/sessionSlice"; interface ToolDropdownProps { disabled: boolean; @@ -56,16 +56,16 @@ export default function ToolDropdown(props: ToolDropdownProps) { !isDisabled && dispatch(toggleUseTools())}>
!isDisabled && setIsHovered(true)} + onMouseLeave={() => !isDisabled && setIsHovered(false)} > !isDisabled && setIsHovered(true)} - onMouseLeave={() => !isDisabled && setIsHovered(false)} /> {isDisabled && ( @@ -83,93 +83,93 @@ export default function ToolDropdown(props: ToolDropdownProps) { {useTools && !isDisabled && ( <> Tools - -
- {}} - as="div" - onClick={(e) => e.stopPropagation()} - disabled={isDisabled} - > - {({ open }) => ( - <> - { - e.stopPropagation(); - setDropdownOpen(!isDropdownOpen); - }} - className="text-lightgray flex cursor-pointer items-center border-none bg-transparent px-0 outline-none" - aria-disabled={isDisabled} - > - - - setDropdownOpen(false)} + {}} + as="div" + onClick={(e) => e.stopPropagation()} + disabled={isDisabled} + > + {({ open }) => ( + <> + { + e.stopPropagation(); + setDropdownOpen(!isDropdownOpen); + }} + className="text-lightgray flex cursor-pointer items-center border-none bg-transparent px-0 outline-none" + aria-disabled={isDisabled} + > + + + setDropdownOpen(false)} + > + - +
+ Tool policies{" "} + +

+ + Automatic: + {" "} + Can be used without asking +

+

+ + Allowed: + {" "} + Will ask before using +

+

+ + Disabled: + {" "} + Cannot be used +

+
+ } + /> +
+
+
-
-
( + - Tool policies{" "} - -

- - Automatic: - {" "} - Can be used without asking -

-

- - Allowed: - {" "} - Will ask before using -

-

- - Disabled: - {" "} - Cannot be used -

-
- } - /> -
-
-
- {availableTools.map((tool: any) => ( - - - - ))} -
- - - - )} - - + + + ))} + + + + + )} + )} diff --git a/gui/src/components/mainInput/InputToolbar/ToolDropdownItem.tsx b/gui/src/components/mainInput/InputToolbar/ToolDropdownItem.tsx index 5df66f147a..8bc47adaaf 100644 --- a/gui/src/components/mainInput/InputToolbar/ToolDropdownItem.tsx +++ b/gui/src/components/mainInput/InputToolbar/ToolDropdownItem.tsx @@ -26,7 +26,7 @@ function ToolDropdownItem(props: ToolDropdownItemProps) { return (
{ dispatch(toggleToolSetting(props.tool.function.name)); e.stopPropagation(); diff --git a/gui/src/components/modelSelection/ModelSelect.tsx b/gui/src/components/modelSelection/ModelSelect.tsx index 7cf854e4d0..7315d7dbb9 100644 --- a/gui/src/components/modelSelection/ModelSelect.tsx +++ b/gui/src/components/modelSelection/ModelSelect.tsx @@ -49,6 +49,8 @@ const StyledListboxButton = styled(Listbox.Button)` font-size: ${getFontSize() - 2}px; background: transparent; color: ${lightGray}; + width: 100%; + overflow: hidden; &:focus { outline: none; } @@ -306,7 +308,7 @@ function ModelSelect() { dispatch(setDefaultModel({ title: val })); }} > -
+
-
- +
+ {modelSelectTitle(defaultModel) || "Select model"}{" "} { + const dispatch = useAppDispatch(); + const defaultModel = useAppSelector(selectDefaultModel); + const lastModelRef = useRef(null); + + useEffect(() => { + // Skip if no model is selected + if (!defaultModel) return; + + // Generate a unique ID for this model + const modelId = `${defaultModel.provider}-${defaultModel.model}`; + + // Only update settings if model has changed + if (lastModelRef.current === modelId) return; + + // Update the lastModelRef to prevent future updates for the same model + lastModelRef.current = modelId; + + // Initialize Anthropic thinking settings from model config + if ( + defaultModel.provider === "anthropic" && + defaultModel.completionOptions?.thinking?.budget_tokens + ) { + dispatch( + setAnthropicBudgetTokens( + defaultModel.completionOptions.thinking.budget_tokens + ) + ); + } + + // Initialize OpenAI reasoning effort from model config + if ( + defaultModel.provider === "openai" && + defaultModel.completionOptions?.reasoning_effort + ) { + dispatch( + setOpenAIReasoningEffort( + defaultModel.completionOptions.reasoning_effort as "low" | "medium" | "high" + ) + ); + } + }, [defaultModel, dispatch]); +}; diff --git a/gui/src/pages/gui/Chat.tsx b/gui/src/pages/gui/Chat.tsx index 9fe39d41ed..d911a6cc7b 100644 --- a/gui/src/pages/gui/Chat.tsx +++ b/gui/src/pages/gui/Chat.tsx @@ -387,54 +387,60 @@ export function Chat() { contextItems={item.contextItems} toolCallId={item.message.toolCallId} /> - ) : item.message.role === "assistant" && - item.message.toolCalls && - item.toolCallState ? ( -
- {item.message.toolCalls?.map((toolCall, i) => { - return ( -
- -
- ); - })} -
) : ( -
- - ) : false ? ( - + {item.message.role === "assistant" && ( +
+ + ) : false ? ( + + ) : ( + + ) + } + open={ + typeof stepsOpen[index] === "undefined" + ? false + ? false + : true + : stepsOpen[index]! + } + onToggle={() => {}} + > + - ) : ( - - ) - } - open={ - typeof stepsOpen[index] === "undefined" - ? false - ? false - : true - : stepsOpen[index]! - } - onToggle={() => {}} - > - - -
+
+
+ )} + {item.message.role === "assistant" && + item.message.toolCalls && + item.toolCallState && ( +
+ {item.message.toolCalls?.map((toolCall, i) => ( +
+ +
+ ))} +
+ )} + )}
diff --git a/gui/src/redux/slices/sessionSlice.ts b/gui/src/redux/slices/sessionSlice.ts index be4d3aa774..9dc7256097 100644 --- a/gui/src/redux/slices/sessionSlice.ts +++ b/gui/src/redux/slices/sessionSlice.ts @@ -18,6 +18,7 @@ import { PromptLog, Session, SessionMetadata, + ThinkingMessagePart, ToolCallDelta, ToolCallState, } from "core"; @@ -183,7 +184,6 @@ export const sessionSlice = createSlice({ }>, ) => { const { index, editorState } = payload; - if (state.history.length && index < state.history.length) { // Resubmission - update input message, truncate history after resubmit with new empty response message if (index % 2 === 1) { @@ -315,26 +315,23 @@ export const sessionSlice = createSlice({ parsedArgs, }; } - for (const message of action.payload) { const lastItem = state.history[state.history.length - 1]; const lastMessage = lastItem.message; + // Simplified condition to keep thinking blocks and tool calls together in the same message + // Only create a new message when: + // 1. There is no previous message + // 2. Roles are different (e.g., user vs assistant) + // 3. For tool role messages, always create new ones if ( + !lastItem || lastMessage.role !== message.role || - // This is for when a tool call comes immediately before/after tool call - (lastMessage.role === "assistant" && - message.role === "assistant" && - // Last message isn't completely new - !(!lastMessage.toolCalls?.length && !lastMessage.content) && - // And there's a difference in tool call presence - (lastMessage.toolCalls?.length ?? 0) !== - (message.toolCalls?.length ?? 0)) + message.role === "tool" ) { - // Create a new message + // Create a new message - pass the message directly without modifying content const historyItem: ChatHistoryItemWithMessageId = { message: { ...message, - content: renderChatMessage(message), id: uuidv4(), }, contextItems: [], @@ -358,29 +355,216 @@ export const sessionSlice = createSlice({ } else { // Add to the existing message if (message.content) { - const messageContent = renderChatMessage(message); - if (messageContent.includes("")) { + // Check if the message content is an array with parts + if ( + Array.isArray(message.content) && + message.content.length > 0 + ) { + // Process each part in the array separately + let handledAllParts = true; + + for (const part of message.content) { + if (part.type === "thinking") { + // Initialize reasoning if it doesn't exist + if (!lastItem.reasoning) { + lastItem.reasoning = { + startAt: Date.now(), + active: true, + text: "", + endAt: undefined, + }; + } + + // Check if this is a completion signal for thinking (signature present) + if (part.signature) { + // Add signature to the thinking part + if (Array.isArray(lastMessage.content)) { + let thinkingPart = lastMessage.content.find( + (p) => p.type === "thinking", + ); + if (thinkingPart) { + (thinkingPart as ThinkingMessagePart).signature = + part.signature; + } else { + lastMessage.content.push(part); + } + } else { + lastMessage.content = [part]; + } + // Mark thinking as complete + if (lastItem.reasoning) { + lastItem.reasoning.active = false; + lastItem.reasoning.endAt = Date.now(); + } + } else if (part.thinking) { + if (Array.isArray(lastMessage.content)) { + // Append thinking delta to the last thinking part + let thinkingPart = lastMessage.content.find( + (p) => p.type === "thinking", + ); + if (thinkingPart) { + (thinkingPart as ThinkingMessagePart).thinking += + part.thinking; + } else { + lastMessage.content.push(part); + } + } else { + lastMessage.content = [part]; + } + // Append the thinking delta to the reasoning text + lastItem.reasoning.text += part.thinking; + } + continue; + } else if (part.type === "redacted_thinking") { + // Initialize reasoning if it doesn't exist + if (!lastItem.reasoning) { + lastItem.reasoning = { + startAt: Date.now(), + active: true, + text: "", + endAt: undefined, + }; + } + + // Add a placeholder for redacted thinking + lastItem.reasoning.text = + "[Some thinking content has been redacted for safety reasons]"; + + // Mark thinking as complete if it's been redacted + lastItem.reasoning.active = false; + lastItem.reasoning.endAt = Date.now(); + + // IMPORTANT: Preserve ALL redacted_thinking parts in the message content array + if (Array.isArray(lastMessage.content)) { + lastMessage.content.push(part); + } else { + lastMessage.content = [part]; + } + continue; + } else if (part.type === "text") { + // For text parts, add directly to the message content + if ( + typeof lastMessage.content === "string" && + lastMessage.content.length > 0 + ) { + lastMessage.content += part.text; + } else if (Array.isArray(lastMessage.content)) { + // Find existing text part or add a new one + const textPart = lastMessage.content.find( + (p) => p.type === "text", + ); + if (textPart && textPart.type === "text") { + textPart.text += part.text; + } else { + lastMessage.content.push({ + type: "text", + text: part.text, + }); + } + } else { + // Initialize with an array containing this text part + lastMessage.content = [{ type: "text", text: part.text }]; + } + continue; + } else { + handledAllParts = false; + } + } + + // Only if we couldn't handle all parts, fall back to the default handling + if (handledAllParts) { + continue; + } + + // For other content types, use renderChatMessage + const messageContent = renderChatMessage(message); + if (typeof lastMessage.content === "string") { + lastMessage.content += messageContent; + } + } else { + // Reasoning is streamed before the regular content, so if we had any, end it. + if (lastItem.reasoning) { + lastItem.reasoning.active = false; + lastItem.reasoning.endAt = Date.now(); + } + + // Handle string content or legacy format + const messageContent = renderChatMessage(message); + + // Make sure message content is a string + if (typeof lastMessage.content !== "string") { + lastMessage.content = ""; + } + + // Add the message content to the existing content + lastMessage.content += messageContent; + + // Current full content + const fullContent = lastMessage.content as string; + + // If we find tags, extract the content for the reasoning field + if ( + fullContent.includes("") && + fullContent.includes("") + ) { + // Extract content between and + const thinkMatches = fullContent.match( + /(.*?)<\/think>/s, + ); + + if (thinkMatches && thinkMatches[1]) { + // Initialize or update the reasoning + if (!lastItem.reasoning) { + lastItem.reasoning = { + startAt: Date.now(), + active: false, // Set to false since we have a complete thinking block + text: thinkMatches[1], + endAt: Date.now(), + }; + } else { + // Update existing reasoning with complete content + lastItem.reasoning.text = thinkMatches[1]; + lastItem.reasoning.active = false; + lastItem.reasoning.endAt = Date.now(); + } + } + } else if ( + fullContent.includes("") && + !fullContent.includes("") + ) { + // We have an incomplete thinking block + // Initialize reasoning if we don't have one yet + if (!lastItem.reasoning) { + lastItem.reasoning = { + startAt: Date.now(), + active: true, + text: "", + endAt: undefined, + }; + } + + // Extract content after tag for reasoning field + const afterThinkTag = fullContent.split("")[1]; + if (afterThinkTag) { + lastItem.reasoning.text = afterThinkTag; + } + } + } + } else if ( + message.role === "assistant" && + message.reasoning_content + ) { + // Initialize reasoning if it doesn't exist + if (!lastItem.reasoning) { lastItem.reasoning = { startAt: Date.now(), active: true, - text: messageContent.replace("", "").trim(), + text: message.reasoning_content, + endAt: undefined, }; - } else if ( - lastItem.reasoning?.active && - messageContent.includes("") - ) { - const [reasoningEnd, answerStart] = - messageContent.split(""); - lastItem.reasoning.text += reasoningEnd.trimEnd(); - lastItem.reasoning.active = false; - lastItem.reasoning.endAt = Date.now(); - lastMessage.content += answerStart.trimStart(); - } else if (lastItem.reasoning?.active) { - lastItem.reasoning.text += messageContent; } else { - // Note this only works because new message above - // was already rendered from parts to string - lastMessage.content += messageContent; + // Append to existing reasoning + lastItem.reasoning.text += message.reasoning_content; } } else if ( message.role === "assistant" && @@ -473,9 +657,9 @@ export const sessionSlice = createSlice({ state.allSessionMetadata = state.allSessionMetadata.map((session) => session.sessionId === payload.sessionId ? { - ...session, - ...payload, - } + ...session, + ...payload, + } : session, ); if (payload.title && payload.sessionId === state.id) { @@ -510,8 +694,9 @@ export const sessionSlice = createSlice({ payload.rangeInFileWithContents.filepath, ); - const lineNums = `(${payload.rangeInFileWithContents.range.start.line + 1 - }-${payload.rangeInFileWithContents.range.end.line + 1})`; + const lineNums = `(${ + payload.rangeInFileWithContents.range.start.line + 1 + }-${payload.rangeInFileWithContents.range.end.line + 1})`; contextItems.push({ name: `${fileName} ${lineNums}`, @@ -696,9 +881,9 @@ function addPassthroughCases( ) { thunks.forEach((thunk) => { builder - .addCase(thunk.fulfilled, (state, action) => { }) - .addCase(thunk.rejected, (state, action) => { }) - .addCase(thunk.pending, (state, action) => { }); + .addCase(thunk.fulfilled, (state, action) => {}) + .addCase(thunk.rejected, (state, action) => {}) + .addCase(thunk.pending, (state, action) => {}); }); } diff --git a/gui/src/redux/slices/uiSlice.ts b/gui/src/redux/slices/uiSlice.ts index 8b03d30f34..18c9f93135 100644 --- a/gui/src/redux/slices/uiSlice.ts +++ b/gui/src/redux/slices/uiSlice.ts @@ -21,6 +21,16 @@ type UIState = { hasDismissedExploreDialog: boolean; shouldAddFileForEditing: boolean; useTools: boolean; + useThinking: boolean; // New toggle for thinking + thinkingSettings: { + // Settings for different providers + anthropic: { + budgetTokens: number; // Min 1024, max is below maxTokens + }; + openai: { + reasoningEffort: "low" | "medium" | "high"; + }; + }; toolSettings: { [toolName: string]: ToolSetting }; ttsActive: boolean; }; @@ -41,6 +51,15 @@ export const uiSlice = createSlice({ shouldAddFileForEditing: false, ttsActive: false, useTools: false, + useThinking: false, + thinkingSettings: { + anthropic: { + budgetTokens: 4096, // Default reasonable value (half of typical 8K max) + }, + openai: { + reasoningEffort: "medium", // Default value + }, + }, toolSettings: { [BuiltInToolNames.ReadFile]: "allowedWithoutPermission", [BuiltInToolNames.CreateNewFile]: "allowedWithPermission", @@ -109,6 +128,21 @@ export const uiSlice = createSlice({ break; } }, + // Thinking Controls + toggleUseThinking: (state) => { + state.useThinking = !state.useThinking; + }, + setAnthropicBudgetTokens: (state, action: PayloadAction) => { + state.useThinking = true; + state.thinkingSettings.anthropic.budgetTokens = action.payload; + }, + setOpenAIReasoningEffort: ( + state, + action: PayloadAction<"low" | "medium" | "high">, + ) => { + state.useThinking = true; + state.thinkingSettings.openai.reasoningEffort = action.payload; + }, setTTSActive: (state, { payload }: PayloadAction) => { state.ttsActive = payload; }, @@ -125,6 +159,9 @@ export const { toggleUseTools, toggleToolSetting, addTool, + toggleUseThinking, + setAnthropicBudgetTokens, + setOpenAIReasoningEffort, setTTSActive, } = uiSlice.actions; diff --git a/gui/src/redux/thunks/streamNormalInput.ts b/gui/src/redux/thunks/streamNormalInput.ts index 036ef44318..f240787603 100644 --- a/gui/src/redux/thunks/streamNormalInput.ts +++ b/gui/src/redux/thunks/streamNormalInput.ts @@ -1,5 +1,6 @@ import { createAsyncThunk, unwrapResult } from "@reduxjs/toolkit"; -import { ChatMessage, PromptLog } from "core"; +import { ChatMessage, LLMFullCompletionOptions } from "core"; +import { modelSupportsThinking, modelSupportsTools } from "core/llm/autodetect"; import { selectCurrentToolCall } from "../selectors/selectCurrentToolCall"; import { selectDefaultModel } from "../slices/configSlice"; import { @@ -10,7 +11,6 @@ import { } from "../slices/sessionSlice"; import { ThunkApiType } from "../store"; import { callTool } from "./callTool"; -import { modelSupportsTools } from "core/llm/autodetect"; export const streamNormalInput = createAsyncThunk< void, @@ -32,18 +32,53 @@ export const streamNormalInput = createAsyncThunk< modelSupportsTools(defaultModel) && state.session.mode === "chat"; + // Prepare options + const options: LLMFullCompletionOptions = {}; + + // Add tools if supported + if (includeTools) { + options.tools = state.config.config.tools.filter( + (tool) => toolSettings[tool.function.name] !== "disabled", + ); + } + + // Add thinking options based on UI settings + const useThinking = state.ui.useThinking; + const thinkingSettings = state.ui.thinkingSettings; + + if ( + useThinking && + modelSupportsThinking( + defaultModel.provider, + defaultModel.model, + defaultModel.title, + defaultModel.capabilities, + ) + ) { + // For Anthropic models + if (defaultModel.provider === "anthropic") { + options.thinking = { + type: "enabled", + budget_tokens: thinkingSettings.anthropic.budgetTokens, + }; + } + // For OpenAI models + else if (defaultModel.provider === "openai") { + options.reasoning_effort = thinkingSettings.openai.reasoningEffort; + } + } else { + // Disable thinking if thinking capability is explicitly false + options.thinking = { + type: "disabled", + }; + } + // Send request const gen = extra.ideMessenger.llmStreamChat( defaultModel.title, streamAborter.signal, messages, - includeTools - ? { - tools: state.config.config.tools.filter( - (tool) => toolSettings[tool.function.name] !== "disabled", - ), - } - : {}, + options, ); // Stream response From a7a153c2c084d05a3e9ff05f5f267dd7b7e8dfa0 Mon Sep 17 00:00:00 2001 From: FallDownTheSystem <8807171+FallDownTheSystem@users.noreply.github.com> Date: Mon, 10 Mar 2025 09:41:04 +0200 Subject: [PATCH 2/3] Remove unused code Fix thinking icon color not switching back to gray --- core/llm/llms/Anthropic.ts | 36 +++++++------------ .../InputToolbar/ToggleThinkingButton.tsx | 2 +- 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/core/llm/llms/Anthropic.ts b/core/llm/llms/Anthropic.ts index 38fb05a35a..8bd3158853 100644 --- a/core/llm/llms/Anthropic.ts +++ b/core/llm/llms/Anthropic.ts @@ -39,9 +39,9 @@ class Anthropic extends BaseLLM { })), tool_choice: options.toolChoice ? { - type: "tool", - name: options.toolChoice.function.name, - } + type: "tool", + name: options.toolChoice.function.name, + } : undefined, thinking: options.thinking, }; @@ -133,28 +133,16 @@ class Anthropic extends BaseLLM { return chatMessage; } - // Filter out empty thinking blocks before mapping - const filteredContent = Array.isArray(message.content) - ? message.content.filter( - (part) => - !( - part.type === "thinking" && - (!part.thinking || part.thinking.trim() === "") && - (!part.signature || part.signature.trim() === "") - ), - ) - : message.content; - const convertedContent = ( - Array.isArray(filteredContent) ? filteredContent : [filteredContent] + Array.isArray(message.content) ? message.content : [message.content] ).map((part, contentIdx) => { if (part.type === "text") { const newpart = { ...part, // If multiple text parts, only add cache_control to the last one ...(addCaching && - contentIdx == - (Array.isArray(filteredContent) ? filteredContent.length : 1) - 1 + contentIdx == + (Array.isArray(message.content) ? message.content.length : 1) - 1 ? { cache_control: { type: "ephemeral" } } : {}), }; @@ -270,12 +258,12 @@ class Anthropic extends BaseLLM { messages: msgs, system: shouldCacheSystemMessage ? [ - { - type: "text", - text: this.systemMessage, - cache_control: { type: "ephemeral" }, - }, - ] + { + type: "text", + text: this.systemMessage, + cache_control: { type: "ephemeral" }, + }, + ] : systemMessage, }; diff --git a/gui/src/components/mainInput/InputToolbar/ToggleThinkingButton.tsx b/gui/src/components/mainInput/InputToolbar/ToggleThinkingButton.tsx index 8ca0ccc31f..5b9edd50ec 100644 --- a/gui/src/components/mainInput/InputToolbar/ToggleThinkingButton.tsx +++ b/gui/src/components/mainInput/InputToolbar/ToggleThinkingButton.tsx @@ -92,7 +92,7 @@ export default function ToggleThinkingButton(props: ThinkingButtonProps) { // Get OpenAI icon color based on reasoning effort const getIconColor = () => { - if (provider === "openai" && useThinking) { + if (provider === "openai" && useThinking && !props.disabled) { const effort = thinkingSettings.openai.reasoningEffort; if (effort === "low") return "text-red-400"; if (effort === "medium") return "text-yellow-400"; From 3b930d8273e724e1696a23f3dce4b4fe93677346 Mon Sep 17 00:00:00 2001 From: FallDownTheSystem <8807171+FallDownTheSystem@users.noreply.github.com> Date: Thu, 13 Mar 2025 18:58:23 +0200 Subject: [PATCH 3/3] Improve default settings for thinking --- gui/src/hooks/useModelThinkingSettings.ts | 36 ++++++++++++++++------- gui/src/redux/slices/uiSlice.ts | 6 ++-- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/gui/src/hooks/useModelThinkingSettings.ts b/gui/src/hooks/useModelThinkingSettings.ts index 31ccb4b93c..c19cf682ce 100644 --- a/gui/src/hooks/useModelThinkingSettings.ts +++ b/gui/src/hooks/useModelThinkingSettings.ts @@ -1,7 +1,7 @@ import { useEffect, useRef } from 'react'; import { useAppDispatch, useAppSelector } from '../redux/hooks'; import { selectDefaultModel } from '../redux/slices/configSlice'; -import { setAnthropicBudgetTokens, setOpenAIReasoningEffort } from '../redux/slices/uiSlice'; +import { setAnthropicBudgetTokens, setOpenAIReasoningEffort, setUseThinking } from '../redux/slices/uiSlice'; /** * This hook initializes thinking settings when the model changes. @@ -12,42 +12,58 @@ export const useModelThinkingSettings = () => { const dispatch = useAppDispatch(); const defaultModel = useAppSelector(selectDefaultModel); const lastModelRef = useRef(null); - + useEffect(() => { // Skip if no model is selected if (!defaultModel) return; - + // Generate a unique ID for this model const modelId = `${defaultModel.provider}-${defaultModel.model}`; - + // Only update settings if model has changed if (lastModelRef.current === modelId) return; - + // Update the lastModelRef to prevent future updates for the same model lastModelRef.current = modelId; - + // Initialize Anthropic thinking settings from model config if ( - defaultModel.provider === "anthropic" && + defaultModel.provider === "anthropic" && defaultModel.completionOptions?.thinking?.budget_tokens ) { + dispatch( + setUseThinking(defaultModel.completionOptions.thinking.type === "enabled") + ); dispatch( setAnthropicBudgetTokens( defaultModel.completionOptions.thinking.budget_tokens ) ); } - + // Initialize OpenAI reasoning effort from model config if ( - defaultModel.provider === "openai" && + defaultModel.provider === "openai" && defaultModel.completionOptions?.reasoning_effort ) { + dispatch( + setUseThinking(true) + ); dispatch( setOpenAIReasoningEffort( - defaultModel.completionOptions.reasoning_effort as "low" | "medium" | "high" + defaultModel.completionOptions.reasoning_effort ) ); } + + // Initialize DeepSeek to enable thinking by default + if ( + defaultModel.provider === "deepseek" && + defaultModel.model.includes("reasoner") + ) { + dispatch( + setUseThinking(true) + ); + } }, [defaultModel, dispatch]); }; diff --git a/gui/src/redux/slices/uiSlice.ts b/gui/src/redux/slices/uiSlice.ts index 18c9f93135..732ff24198 100644 --- a/gui/src/redux/slices/uiSlice.ts +++ b/gui/src/redux/slices/uiSlice.ts @@ -129,18 +129,19 @@ export const uiSlice = createSlice({ } }, // Thinking Controls + setUseThinking: (state, action: PayloadAction) => { + state.useThinking = action.payload; + }, toggleUseThinking: (state) => { state.useThinking = !state.useThinking; }, setAnthropicBudgetTokens: (state, action: PayloadAction) => { - state.useThinking = true; state.thinkingSettings.anthropic.budgetTokens = action.payload; }, setOpenAIReasoningEffort: ( state, action: PayloadAction<"low" | "medium" | "high">, ) => { - state.useThinking = true; state.thinkingSettings.openai.reasoningEffort = action.payload; }, setTTSActive: (state, { payload }: PayloadAction) => { @@ -160,6 +161,7 @@ export const { toggleToolSetting, addTool, toggleUseThinking, + setUseThinking, setAnthropicBudgetTokens, setOpenAIReasoningEffort, setTTSActive,