feat: better support for reasoning/thinking

cpsievert · cpsievert · commit 77c4ff32dd1f · 2025-11-05T18:47:05.000-06:00
diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py
@@ -17,6 +17,7 @@
     ContentJson,
     ContentPDF,
     ContentText,
+    ContentThinking,
     ContentToolRequest,
     ContentToolResult,
     ContentToolResultImage,
@@ -41,6 +42,8 @@
         MessageParam,
         RawMessageStreamEvent,
         TextBlock,
+        ThinkingBlock,
+        ThinkingBlockParam,
         ToolParam,
         ToolUseBlock,
     )
@@ -50,6 +53,7 @@
     from anthropic.types.messages.batch_create_params import Request as BatchRequest
     from anthropic.types.model_param import ModelParam
     from anthropic.types.text_block_param import TextBlockParam
+    from anthropic.types.thinking_config_enabled_param import ThinkingConfigEnabledParam
     from anthropic.types.tool_result_block_param import ToolResultBlockParam
     from anthropic.types.tool_use_block_param import ToolUseBlockParam
 
@@ -61,6 +65,7 @@
         ToolUseBlockParam,
         ToolResultBlockParam,
         DocumentBlockParam,
+        ThinkingBlockParam,
     ]
 else:
     Message = object
@@ -71,8 +76,9 @@ def ChatAnthropic(
     *,
     system_prompt: Optional[str] = None,
     model: "Optional[ModelParam]" = None,
-    api_key: Optional[str] = None,
     max_tokens: int = 4096,
+    reasoning: Optional["int | ThinkingConfigEnabledParam"] = None,
+    api_key: Optional[str] = None,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", Message]:
     """
@@ -119,12 +125,19 @@ def ChatAnthropic(
         The model to use for the chat. The default, None, will pick a reasonable
         default, and warn you about it. We strongly recommend explicitly
         choosing a model for all but the most casual use.
+    max_tokens
+        Maximum number of tokens to generate before stopping.
+    reasoning
+        Determines how many tokens Claude can be allocated to reasoning. Must be
+        ≥1024 and less than `max_tokens`. Larger budgets can enable more
+        thorough analysis for complex problems, improving response quality.  See
+        [extended
+        thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
+        for details.
     api_key
         The API key to use for authentication. You generally should not supply
         this directly, but instead set the `ANTHROPIC_API_KEY` environment
         variable.
-    max_tokens
-        Maximum number of tokens to generate before stopping.
     kwargs
         Additional arguments to pass to the `anthropic.Anthropic()` client
         constructor.
@@ -174,6 +187,12 @@ def ChatAnthropic(
     if model is None:
         model = log_model_default("claude-sonnet-4-0")
 
+    kwargs_chat: "SubmitInputArgs" = {}
+    if reasoning is not None:
+        if isinstance(reasoning, int):
+            reasoning = {"type": "enabled", "budget_tokens": reasoning}
+        kwargs_chat = {"thinking": reasoning}
+
     return Chat(
         provider=AnthropicProvider(
             api_key=api_key,
@@ -182,6 +201,7 @@ def ChatAnthropic(
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
+        kwargs_chat=kwargs_chat,
     )
 
 
@@ -396,6 +416,12 @@ def stream_merge_chunks(self, completion, chunk):
                 if not isinstance(this_content.input, str):
                     this_content.input = ""  # type: ignore
                 this_content.input += json_delta  # type: ignore
+            elif chunk.delta.type == "thinking_delta":
+                this_content = cast("ThinkingBlock", this_content)
+                this_content.thinking += chunk.delta.thinking
+            elif chunk.delta.type == "signature_delta":
+                this_content = cast("ThinkingBlock", this_content)
+                this_content.signature += chunk.delta.signature
         elif chunk.type == "content_block_stop":
             this_content = completion.content[chunk.index]
             if this_content.type == "tool_use" and isinstance(this_content.input, str):
@@ -588,6 +614,13 @@ def _as_content_block(content: Content) -> "ContentBlockParam":
                 res["content"] = content.get_model_value()  # type: ignore
 
             return res
+        elif isinstance(content, ContentThinking):
+            extra = content.extra or {}
+            return {
+                "type": "thinking",
+                "thinking": content.thinking,
+                "signature": extra.get("signature", ""),
+            }
 
         raise ValueError(f"Unknown content type: {type(content)}")
 
@@ -641,6 +674,13 @@ def _as_turn(self, completion: Message, has_data_model=False) -> Turn:
                             arguments=content.input,
                         )
                     )
+            elif content.type == "thinking":
+                contents.append(
+                    ContentThinking(
+                        thinking=content.thinking,
+                        extra={"signature": content.signature},
+                    )
+                )
 
         return Turn(
             "assistant",
diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py
@@ -34,6 +34,7 @@
         GenerateContentResponseDict,
         Part,
         PartDict,
+        ThinkingConfigDict,
     )
 
     from .types.google import ChatClientArgs, SubmitInputArgs
@@ -45,6 +46,7 @@ def ChatGoogle(
     *,
     system_prompt: Optional[str] = None,
     model: Optional[str] = None,
+    reasoning: Optional["int | ThinkingConfigDict"] = None,
     api_key: Optional[str] = None,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", GenerateContentResponse]:
@@ -86,6 +88,10 @@ def ChatGoogle(
         The model to use for the chat. The default, None, will pick a reasonable
         default, and warn you about it. We strongly recommend explicitly choosing
         a model for all but the most casual use.
+    reasoning
+        If provided, enables reasoning (a.k.a. "thoughts") in the model's
+        responses. This can be an integer number of tokens to use for reasoning,
+        or a full `ThinkingConfigDict` to customize the reasoning behavior.
     api_key
         The API key to use for authentication. You generally should not supply
         this directly, but instead set the `GOOGLE_API_KEY` environment variable.
@@ -137,14 +143,20 @@ def ChatGoogle(
     if model is None:
         model = log_model_default("gemini-2.5-flash")
 
+    kwargs_chat: "SubmitInputArgs" = {}
+    if reasoning is not None:
+        if isinstance(reasoning, int):
+            reasoning = {"thinking_budget": reasoning, "include_thoughts": True}
+        kwargs_chat["config"] = {"thinking_config": reasoning}
+
     return Chat(
         provider=GoogleProvider(
             model=model,
             api_key=api_key,
-            name="Google/Gemini",
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
+        kwargs_chat=kwargs_chat,
     )
 
 
@@ -367,7 +379,7 @@ def value_tokens(self, completion):
         cached = usage.cached_content_token_count or 0
         return (
             (usage.prompt_token_count or 0) - cached,
-            usage.candidates_token_count or 0,
+            (usage.candidates_token_count or 0) + (usage.thoughts_token_count or 0),
             usage.cached_content_token_count or 0,
         )
 
diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py
@@ -35,6 +35,8 @@
     )
     from openai.types.responses.easy_input_message_param import EasyInputMessageParam
     from openai.types.responses.tool_param import ToolParam
+    from openai.types.shared.reasoning_effort import ReasoningEffort
+    from openai.types.shared_params.reasoning import Reasoning
     from openai.types.shared_params.responses_model import ResponsesModel
 
     from .types.openai import ChatClientArgs
@@ -47,8 +49,9 @@ def ChatOpenAI(
     *,
     system_prompt: Optional[str] = None,
     model: "Optional[ResponsesModel | str]" = None,
-    api_key: Optional[str] = None,
     base_url: str = "https://api.openai.com/v1",
+    reasoning: "Optional[ReasoningEffort | Reasoning]" = None,
+    api_key: Optional[str] = None,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", Response]:
     """
@@ -87,12 +90,15 @@ def ChatOpenAI(
         The model to use for the chat. The default, None, will pick a reasonable
         default, and warn you about it. We strongly recommend explicitly
         choosing a model for all but the most casual use.
+    base_url
+        The base URL to the endpoint; the default uses OpenAI.
+    reasoning
+        The reasoning effort to use (for reasoning-capable models like the o and
+        gpt-5 series).
     api_key
         The API key to use for authentication. You generally should not supply
         this directly, but instead set the `OPENAI_API_KEY` environment
         variable.
-    base_url
-        The base URL to the endpoint; the default uses OpenAI.
     kwargs
         Additional arguments to pass to the `openai.OpenAI()` client
         constructor.
@@ -146,6 +152,14 @@ def ChatOpenAI(
     if model is None:
         model = log_model_default("gpt-4.1")
 
+    kwargs_chat: "SubmitInputArgs" = {}
+    if reasoning is not None:
+        if not is_reasoning_model(model):
+            warnings.warn(f"Model {model} is not reasoning-capable", UserWarning)
+        if isinstance(reasoning, str):
+            reasoning = {"effort": reasoning, "summary": "auto"}
+        kwargs_chat = {"reasoning": reasoning}
+
     return Chat(
         provider=OpenAIProvider(
             api_key=api_key,
@@ -154,6 +168,7 @@ def ChatOpenAI(
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
+        kwargs_chat=kwargs_chat,
     )
 
 
@@ -239,7 +254,7 @@ def _chat_perform_args(
 
         # Request reasoning content for reasoning models
         include = []
-        if self._is_reasoning(self.model):
+        if is_reasoning_model(self.model):
             include.append("reasoning.encrypted_content")
 
         if "log_probs" in kwargs_full:
@@ -254,7 +269,14 @@ def _chat_perform_args(
 
     def stream_text(self, chunk):
         if chunk.type == "response.output_text.delta":
+            # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta
+            return chunk.delta
+        if chunk.type == "response.reasoning_summary_text.delta":
+            # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta
             return chunk.delta
+        if chunk.type == "response.reasoning_summary_text.done":
+            # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done
+            return "\n\n"
         return None
 
     def stream_merge_chunks(self, completion, chunk):
@@ -337,11 +359,6 @@ def _response_as_turn(completion: Response, has_data_model: bool) -> Turn:
             completion=completion,
         )
 
-    @staticmethod
-    def _is_reasoning(model: str) -> bool:
-        # https://platform.openai.com/docs/models/compare
-        return model.startswith("o") or model.startswith("gpt-5")
-
     @staticmethod
     def _turns_as_inputs(turns: list[Turn]) -> "list[ResponseInputItemParam]":
         res: "list[ResponseInputItemParam]" = []
@@ -456,3 +473,8 @@ def as_input_param(content: Content, role: Role) -> "ResponseInputItemParam":
 
 def as_message(x: "ResponseInputContentParam", role: Role) -> "EasyInputMessageParam":
     return {"role": role, "content": [x]}
+
+
+def is_reasoning_model(model: str) -> bool:
+    # https://platform.openai.com/docs/models/compare
+    return model.startswith("o") or model.startswith("gpt-5")