From afef600dd86232b14646abb4b0a0c99082fa7f4b Mon Sep 17 00:00:00 2001 From: efecanceliksoy Date: Wed, 6 May 2026 14:53:52 +0300 Subject: [PATCH] fix: increase Anthropic max_tokens default and warn on truncation max_tokens was hardcoded at 8192 for all Anthropic models. Newer models (Opus, Sonnet) support much higher output. Truncated responses could produce incomplete tool_use JSON that silently breaks tool execution. - Default to 16384 for opus/sonnet models, 8192 for others - Add with_max_tokens() builder for explicit configuration - Log warning when stop_reason is max_tokens so truncation is visible --- src/provider.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/provider.rs b/src/provider.rs index 1e44c9b..7d5ceff 100644 --- a/src/provider.rs +++ b/src/provider.rs @@ -83,12 +83,18 @@ pub struct AnthropicProvider { impl AnthropicProvider { pub fn new(base_url: &str, api_key: &str, model: &str) -> Self { + // Use a higher default for newer models that support longer output + let max_tokens = if model.contains("opus") || model.contains("sonnet") { + 16384 + } else { + 8192 + }; Self { api_key: api_key.to_string(), model: model.to_string(), base_url: base_url.to_string(), temperature: 0.3, - max_tokens: 8192, + max_tokens, client: build_reqwest_client(), } } @@ -98,6 +104,11 @@ impl AnthropicProvider { self } + pub fn with_max_tokens(mut self, max_tokens: u32) -> Self { + self.max_tokens = max_tokens; + self + } + /// Convert OpenAI-format tool definitions to Anthropic format. fn convert_tools(tools: &Value) -> Value { // OpenAI: [{ "type": "function", "function": { "name", "description", "parameters" } }] @@ -366,6 +377,16 @@ impl Provider for AnthropicProvider { } } + // Check if response was truncated due to max_tokens + let stop_reason = json_resp["stop_reason"].as_str().unwrap_or(""); + if stop_reason == "max_tokens" { + log::warn!( + "Anthropic response truncated (stop_reason=max_tokens, max_tokens={}). \ + Tool calls may be incomplete.", + self.max_tokens + ); + } + let usage = json_resp.get("usage").map(|u| TokenUsage { prompt_tokens: u["input_tokens"].as_u64().unwrap_or(0) as u32, completion_tokens: u["output_tokens"].as_u64().unwrap_or(0) as u32,