diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 623c66206a..a601d224f2 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -17,6 +17,7 @@ import ( cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" log "github.com/sirupsen/logrus" + "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -108,6 +109,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A return resp, err } + // newer OpenAI models require max_completion_tokens (#2101) + translated = promoteMaxTokens(translated) + url := strings.TrimSuffix(baseURL, "/") + endpoint httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) if err != nil { @@ -205,6 +209,9 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy return nil, err } + // newer OpenAI models require max_completion_tokens (#2101) + translated = promoteMaxTokens(translated) + // Request usage data in the final streaming chunk so that token statistics // are captured even when the upstream is an OpenAI-compatible provider. translated, _ = sjson.SetBytes(translated, "stream_options.include_usage", true) @@ -386,6 +393,29 @@ func (e *OpenAICompatExecutor) overrideModel(payload []byte, model string) []byt return payload } +// promoteMaxTokens renames max_tokens → max_completion_tokens so that newer +// OpenAI-compatible models don't reject the legacy field. If max_completion_tokens +// is already present, max_tokens is simply removed. +func promoteMaxTokens(payload []byte) []byte { + if len(payload) == 0 { + return payload + } + mt := gjson.GetBytes(payload, "max_tokens") + if !mt.Exists() { + return payload + } + if !gjson.GetBytes(payload, "max_completion_tokens").Exists() { + var err error + payload, err = sjson.SetBytes(payload, "max_completion_tokens", mt.Value()) + if err != nil { + log.Warnf("promoteMaxTokens: failed to set max_completion_tokens: %v", err) + return payload + } + } + payload, _ = sjson.DeleteBytes(payload, "max_tokens") + return payload +} + type statusErr struct { code int msg string diff --git a/internal/runtime/executor/promote_max_tokens_test.go b/internal/runtime/executor/promote_max_tokens_test.go new file mode 100644 index 0000000000..65b9499eca --- /dev/null +++ b/internal/runtime/executor/promote_max_tokens_test.go @@ -0,0 +1,56 @@ +package executor + +import ( + "testing" + + "github.com/tidwall/gjson" +) + +// max_tokens present, no max_completion_tokens → rename. +func TestPromoteMaxTokens_Rename(t *testing.T) { + in := []byte(`{"model":"gpt-5","max_tokens":1024,"messages":[]}`) + out := promoteMaxTokens(in) + + if gjson.GetBytes(out, "max_tokens").Exists() { + t.Error("max_tokens should be removed") + } + if gjson.GetBytes(out, "max_completion_tokens").Int() != 1024 { + t.Errorf("max_completion_tokens = %d, want 1024", gjson.GetBytes(out, "max_completion_tokens").Int()) + } +} + +// max_completion_tokens already set → just drop max_tokens, keep existing value. +func TestPromoteMaxTokens_AlreadySet(t *testing.T) { + in := []byte(`{"max_tokens":512,"max_completion_tokens":2048}`) + out := promoteMaxTokens(in) + + if gjson.GetBytes(out, "max_tokens").Exists() { + t.Error("max_tokens should be removed") + } + if gjson.GetBytes(out, "max_completion_tokens").Int() != 2048 { + t.Errorf("max_completion_tokens = %d, want 2048 (original)", gjson.GetBytes(out, "max_completion_tokens").Int()) + } +} + +// no max_tokens at all → payload unchanged. +func TestPromoteMaxTokens_NoOp(t *testing.T) { + in := []byte(`{"model":"gpt-5","messages":[]}`) + out := promoteMaxTokens(in) + + if gjson.GetBytes(out, "max_tokens").Exists() { + t.Error("unexpected max_tokens") + } + if gjson.GetBytes(out, "max_completion_tokens").Exists() { + t.Error("unexpected max_completion_tokens") + } +} + +// empty/nil payload → no panic. +func TestPromoteMaxTokens_EmptyPayload(t *testing.T) { + if out := promoteMaxTokens(nil); out != nil { + t.Error("nil input should return nil") + } + if out := promoteMaxTokens([]byte{}); len(out) != 0 { + t.Error("empty input should return empty") + } +} diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_response.go b/internal/translator/openai/openai/responses/openai_openai-responses_response.go index 151528526c..0020bb3618 100644 --- a/internal/translator/openai/openai/responses/openai_openai-responses_response.go +++ b/internal/translator/openai/openai/responses/openai_openai-responses_response.go @@ -457,6 +457,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, } if v := req.Get("max_output_tokens"); v.Exists() { completed, _ = sjson.Set(completed, "response.max_output_tokens", v.Int()) + } else if v := req.Get("max_completion_tokens"); v.Exists() { + completed, _ = sjson.Set(completed, "response.max_output_tokens", v.Int()) + } else if v := req.Get("max_tokens"); v.Exists() { + completed, _ = sjson.Set(completed, "response.max_output_tokens", v.Int()) } if v := req.Get("max_tool_calls"); v.Exists() { completed, _ = sjson.Set(completed, "response.max_tool_calls", v.Int()) @@ -631,11 +635,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(_ context.Co } if v := req.Get("max_output_tokens"); v.Exists() { resp, _ = sjson.Set(resp, "max_output_tokens", v.Int()) - } else { - // Also support max_tokens from chat completion style - if v = req.Get("max_tokens"); v.Exists() { - resp, _ = sjson.Set(resp, "max_output_tokens", v.Int()) - } + } else if v := req.Get("max_completion_tokens"); v.Exists() { + resp, _ = sjson.Set(resp, "max_output_tokens", v.Int()) + } else if v := req.Get("max_tokens"); v.Exists() { + resp, _ = sjson.Set(resp, "max_output_tokens", v.Int()) } if v := req.Get("max_tool_calls"); v.Exists() { resp, _ = sjson.Set(resp, "max_tool_calls", v.Int()) diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go b/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go new file mode 100644 index 0000000000..6208d4085f --- /dev/null +++ b/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go @@ -0,0 +1,81 @@ +package responses + +import ( + "context" + "testing" + + "github.com/tidwall/gjson" +) + +// minimal chat completion response for non-stream tests +const minimalChatCompletion = `{ + "id":"chatcmpl-test", + "object":"chat.completion", + "created":1700000000, + "model":"gpt-5", + "choices":[{"index":0,"message":{"role":"assistant","content":"hi"},"finish_reason":"stop"}], + "usage":{"prompt_tokens":5,"completion_tokens":1,"total_tokens":6} +}` + +// When the translated request carries max_output_tokens (Responses-native field), +// the response must echo it back. +func TestNonStream_MaxOutputTokens_Direct(t *testing.T) { + req := []byte(`{"model":"gpt-5","max_output_tokens":4096}`) + resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream( + context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil, + ) + got := gjson.Get(resp, "max_output_tokens").Int() + if got != 4096 { + t.Errorf("max_output_tokens = %d, want 4096", got) + } +} + +// After promoteMaxTokens rewrites max_tokens → max_completion_tokens, +// the response converter must still reconstruct max_output_tokens from the +// promoted field. +func TestNonStream_MaxCompletionTokens_Fallback(t *testing.T) { + // This is what the translated request looks like after promoteMaxTokens() + req := []byte(`{"model":"gpt-5","max_completion_tokens":2048}`) + resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream( + context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil, + ) + got := gjson.Get(resp, "max_output_tokens").Int() + if got != 2048 { + t.Errorf("max_output_tokens = %d, want 2048 (from max_completion_tokens fallback)", got) + } +} + +// Legacy max_tokens (chat completion style) must still be recognized. +func TestNonStream_MaxTokens_LegacyFallback(t *testing.T) { + req := []byte(`{"model":"gpt-5","max_tokens":1024}`) + resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream( + context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil, + ) + got := gjson.Get(resp, "max_output_tokens").Int() + if got != 1024 { + t.Errorf("max_output_tokens = %d, want 1024 (from max_tokens legacy fallback)", got) + } +} + +// max_output_tokens takes priority over max_completion_tokens and max_tokens. +func TestNonStream_MaxOutputTokens_Priority(t *testing.T) { + req := []byte(`{"model":"gpt-5","max_output_tokens":8192,"max_completion_tokens":4096,"max_tokens":2048}`) + resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream( + context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil, + ) + got := gjson.Get(resp, "max_output_tokens").Int() + if got != 8192 { + t.Errorf("max_output_tokens = %d, want 8192 (max_output_tokens has priority)", got) + } +} + +// No token limit fields → max_output_tokens should be absent. +func TestNonStream_NoTokenLimit(t *testing.T) { + req := []byte(`{"model":"gpt-5"}`) + resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream( + context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil, + ) + if gjson.Get(resp, "max_output_tokens").Exists() { + t.Error("max_output_tokens should be absent when no token limit is set") + } +}