Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions internal/runtime/executor/openai_compat_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
log "github.com/sirupsen/logrus"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)

Expand Down Expand Up @@ -108,6 +109,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
return resp, err
}

// newer OpenAI models require max_completion_tokens (#2101)
translated = promoteMaxTokens(translated)

url := strings.TrimSuffix(baseURL, "/") + endpoint
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
if err != nil {
Expand Down Expand Up @@ -205,6 +209,9 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
return nil, err
}

// newer OpenAI models require max_completion_tokens (#2101)
translated = promoteMaxTokens(translated)

// Request usage data in the final streaming chunk so that token statistics
// are captured even when the upstream is an OpenAI-compatible provider.
translated, _ = sjson.SetBytes(translated, "stream_options.include_usage", true)
Expand Down Expand Up @@ -386,6 +393,29 @@ func (e *OpenAICompatExecutor) overrideModel(payload []byte, model string) []byt
return payload
}

// promoteMaxTokens renames max_tokens → max_completion_tokens so that newer
// OpenAI-compatible models don't reject the legacy field. If max_completion_tokens
// is already present, max_tokens is simply removed.
func promoteMaxTokens(payload []byte) []byte {
if len(payload) == 0 {
return payload
}
mt := gjson.GetBytes(payload, "max_tokens")
if !mt.Exists() {
return payload
}
if !gjson.GetBytes(payload, "max_completion_tokens").Exists() {
var err error
payload, err = sjson.SetBytes(payload, "max_completion_tokens", mt.Value())
if err != nil {
log.Warnf("promoteMaxTokens: failed to set max_completion_tokens: %v", err)
return payload
}
}
payload, _ = sjson.DeleteBytes(payload, "max_tokens")
Comment on lines +407 to +415
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

There's a potential for data loss here if the payload is not valid JSON. If sjson.SetBytes fails, it returns the original payload and the error is ignored. Subsequently, sjson.DeleteBytes is called on the original payload. If DeleteBytes succeeds, max_tokens is removed, but max_completion_tokens was never set, effectively losing the max_tokens value.

It's better to handle the error from sjson.SetBytes and return early to prevent this inconsistent state.

    if !gjson.GetBytes(payload, "max_completion_tokens").Exists() {
        var err error
        payload, err = sjson.SetBytes(payload, "max_completion_tokens", mt.Value())
        if err != nil {
            log.Warnf("promoteMaxTokens: failed to set max_completion_tokens, returning original payload: %v", err)
            return payload
        }
    }
    payload, _ = sjson.DeleteBytes(payload, "max_tokens")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve token-limit echo for Responses conversions

Deleting max_tokens here breaks a non-stream /v1/responses round-trip: ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream currently reconstructs max_output_tokens only from max_output_tokens or max_tokens in requestRawJSON (see internal/translator/openai/openai/responses/openai_openai-responses_response.go around lines 632-637). After this change, translated requests carry only max_completion_tokens, so the final Responses payload silently drops max_output_tokens, which is a behavior regression for clients relying on that echoed field.

Useful? React with 👍 / 👎.

return payload
}

type statusErr struct {
code int
msg string
Expand Down
56 changes: 56 additions & 0 deletions internal/runtime/executor/promote_max_tokens_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package executor

import (
"testing"

"github.com/tidwall/gjson"
)

// max_tokens present, no max_completion_tokens → rename.
func TestPromoteMaxTokens_Rename(t *testing.T) {
in := []byte(`{"model":"gpt-5","max_tokens":1024,"messages":[]}`)
out := promoteMaxTokens(in)

if gjson.GetBytes(out, "max_tokens").Exists() {
t.Error("max_tokens should be removed")
}
if gjson.GetBytes(out, "max_completion_tokens").Int() != 1024 {
t.Errorf("max_completion_tokens = %d, want 1024", gjson.GetBytes(out, "max_completion_tokens").Int())
}
}

// max_completion_tokens already set → just drop max_tokens, keep existing value.
func TestPromoteMaxTokens_AlreadySet(t *testing.T) {
in := []byte(`{"max_tokens":512,"max_completion_tokens":2048}`)
out := promoteMaxTokens(in)

if gjson.GetBytes(out, "max_tokens").Exists() {
t.Error("max_tokens should be removed")
}
if gjson.GetBytes(out, "max_completion_tokens").Int() != 2048 {
t.Errorf("max_completion_tokens = %d, want 2048 (original)", gjson.GetBytes(out, "max_completion_tokens").Int())
}
}

// no max_tokens at all → payload unchanged.
func TestPromoteMaxTokens_NoOp(t *testing.T) {
in := []byte(`{"model":"gpt-5","messages":[]}`)
out := promoteMaxTokens(in)

if gjson.GetBytes(out, "max_tokens").Exists() {
t.Error("unexpected max_tokens")
}
if gjson.GetBytes(out, "max_completion_tokens").Exists() {
t.Error("unexpected max_completion_tokens")
}
}

// empty/nil payload → no panic.
func TestPromoteMaxTokens_EmptyPayload(t *testing.T) {
if out := promoteMaxTokens(nil); out != nil {
t.Error("nil input should return nil")
}
if out := promoteMaxTokens([]byte{}); len(out) != 0 {
t.Error("empty input should return empty")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
}
if v := req.Get("max_output_tokens"); v.Exists() {
completed, _ = sjson.Set(completed, "response.max_output_tokens", v.Int())
} else if v := req.Get("max_completion_tokens"); v.Exists() {
completed, _ = sjson.Set(completed, "response.max_output_tokens", v.Int())
} else if v := req.Get("max_tokens"); v.Exists() {
completed, _ = sjson.Set(completed, "response.max_output_tokens", v.Int())
}
if v := req.Get("max_tool_calls"); v.Exists() {
completed, _ = sjson.Set(completed, "response.max_tool_calls", v.Int())
Expand Down Expand Up @@ -631,11 +635,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(_ context.Co
}
if v := req.Get("max_output_tokens"); v.Exists() {
resp, _ = sjson.Set(resp, "max_output_tokens", v.Int())
} else {
// Also support max_tokens from chat completion style
if v = req.Get("max_tokens"); v.Exists() {
resp, _ = sjson.Set(resp, "max_output_tokens", v.Int())
}
} else if v := req.Get("max_completion_tokens"); v.Exists() {
resp, _ = sjson.Set(resp, "max_output_tokens", v.Int())
} else if v := req.Get("max_tokens"); v.Exists() {
resp, _ = sjson.Set(resp, "max_output_tokens", v.Int())
}
if v := req.Get("max_tool_calls"); v.Exists() {
resp, _ = sjson.Set(resp, "max_tool_calls", v.Int())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package responses

import (
"context"
"testing"

"github.com/tidwall/gjson"
)

// minimal chat completion response for non-stream tests
const minimalChatCompletion = `{
"id":"chatcmpl-test",
"object":"chat.completion",
"created":1700000000,
"model":"gpt-5",
"choices":[{"index":0,"message":{"role":"assistant","content":"hi"},"finish_reason":"stop"}],
"usage":{"prompt_tokens":5,"completion_tokens":1,"total_tokens":6}
}`

// When the translated request carries max_output_tokens (Responses-native field),
// the response must echo it back.
func TestNonStream_MaxOutputTokens_Direct(t *testing.T) {
req := []byte(`{"model":"gpt-5","max_output_tokens":4096}`)
resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(
context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil,
)
got := gjson.Get(resp, "max_output_tokens").Int()
if got != 4096 {
t.Errorf("max_output_tokens = %d, want 4096", got)
}
}

// After promoteMaxTokens rewrites max_tokens → max_completion_tokens,
// the response converter must still reconstruct max_output_tokens from the
// promoted field.
func TestNonStream_MaxCompletionTokens_Fallback(t *testing.T) {
// This is what the translated request looks like after promoteMaxTokens()
req := []byte(`{"model":"gpt-5","max_completion_tokens":2048}`)
resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(
context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil,
)
got := gjson.Get(resp, "max_output_tokens").Int()
if got != 2048 {
t.Errorf("max_output_tokens = %d, want 2048 (from max_completion_tokens fallback)", got)
}
}

// Legacy max_tokens (chat completion style) must still be recognized.
func TestNonStream_MaxTokens_LegacyFallback(t *testing.T) {
req := []byte(`{"model":"gpt-5","max_tokens":1024}`)
resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(
context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil,
)
got := gjson.Get(resp, "max_output_tokens").Int()
if got != 1024 {
t.Errorf("max_output_tokens = %d, want 1024 (from max_tokens legacy fallback)", got)
}
}

// max_output_tokens takes priority over max_completion_tokens and max_tokens.
func TestNonStream_MaxOutputTokens_Priority(t *testing.T) {
req := []byte(`{"model":"gpt-5","max_output_tokens":8192,"max_completion_tokens":4096,"max_tokens":2048}`)
resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(
context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil,
)
got := gjson.Get(resp, "max_output_tokens").Int()
if got != 8192 {
t.Errorf("max_output_tokens = %d, want 8192 (max_output_tokens has priority)", got)
}
}

// No token limit fields → max_output_tokens should be absent.
func TestNonStream_NoTokenLimit(t *testing.T) {
req := []byte(`{"model":"gpt-5"}`)
resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(
context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil,
)
if gjson.Get(resp, "max_output_tokens").Exists() {
t.Error("max_output_tokens should be absent when no token limit is set")
}
}
Loading