Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions internal/runtime/executor/openai_compat_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
log "github.com/sirupsen/logrus"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)

Expand Down Expand Up @@ -108,6 +109,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
return resp, err
}

// newer OpenAI models require max_completion_tokens (#2101)
translated = promoteMaxTokens(translated)

url := strings.TrimSuffix(baseURL, "/") + endpoint
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
if err != nil {
Expand Down Expand Up @@ -205,6 +209,9 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
return nil, err
}

// newer OpenAI models require max_completion_tokens (#2101)
translated = promoteMaxTokens(translated)

// Request usage data in the final streaming chunk so that token statistics
// are captured even when the upstream is an OpenAI-compatible provider.
translated, _ = sjson.SetBytes(translated, "stream_options.include_usage", true)
Expand Down Expand Up @@ -386,6 +393,24 @@ func (e *OpenAICompatExecutor) overrideModel(payload []byte, model string) []byt
return payload
}

// promoteMaxTokens renames max_tokens → max_completion_tokens so that newer
// OpenAI-compatible models don't reject the legacy field. If max_completion_tokens
// is already present, max_tokens is simply removed.
func promoteMaxTokens(payload []byte) []byte {
if len(payload) == 0 {
return payload
}
mt := gjson.GetBytes(payload, "max_tokens")
if !mt.Exists() {
return payload
}
if !gjson.GetBytes(payload, "max_completion_tokens").Exists() {
payload, _ = sjson.SetBytes(payload, "max_completion_tokens", mt.Value())
}
payload, _ = sjson.DeleteBytes(payload, "max_tokens")
Comment on lines +407 to +415
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

There's a potential for data loss here if the payload is not valid JSON. If sjson.SetBytes fails, it returns the original payload and the error is ignored. Subsequently, sjson.DeleteBytes is called on the original payload. If DeleteBytes succeeds, max_tokens is removed, but max_completion_tokens was never set, effectively losing the max_tokens value.

It's better to handle the error from sjson.SetBytes and return early to prevent this inconsistent state.

    if !gjson.GetBytes(payload, "max_completion_tokens").Exists() {
        var err error
        payload, err = sjson.SetBytes(payload, "max_completion_tokens", mt.Value())
        if err != nil {
            log.Warnf("promoteMaxTokens: failed to set max_completion_tokens, returning original payload: %v", err)
            return payload
        }
    }
    payload, _ = sjson.DeleteBytes(payload, "max_tokens")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve token-limit echo for Responses conversions

Deleting max_tokens here breaks a non-stream /v1/responses round-trip: ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream currently reconstructs max_output_tokens only from max_output_tokens or max_tokens in requestRawJSON (see internal/translator/openai/openai/responses/openai_openai-responses_response.go around lines 632-637). After this change, translated requests carry only max_completion_tokens, so the final Responses payload silently drops max_output_tokens, which is a behavior regression for clients relying on that echoed field.

Useful? React with 👍 / 👎.

return payload
}

type statusErr struct {
code int
msg string
Expand Down
56 changes: 56 additions & 0 deletions internal/runtime/executor/promote_max_tokens_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package executor

import (
"testing"

"github.com/tidwall/gjson"
)

// max_tokens present, no max_completion_tokens → rename.
func TestPromoteMaxTokens_Rename(t *testing.T) {
in := []byte(`{"model":"gpt-5","max_tokens":1024,"messages":[]}`)
out := promoteMaxTokens(in)

if gjson.GetBytes(out, "max_tokens").Exists() {
t.Error("max_tokens should be removed")
}
if gjson.GetBytes(out, "max_completion_tokens").Int() != 1024 {
t.Errorf("max_completion_tokens = %d, want 1024", gjson.GetBytes(out, "max_completion_tokens").Int())
}
}

// max_completion_tokens already set → just drop max_tokens, keep existing value.
func TestPromoteMaxTokens_AlreadySet(t *testing.T) {
in := []byte(`{"max_tokens":512,"max_completion_tokens":2048}`)
out := promoteMaxTokens(in)

if gjson.GetBytes(out, "max_tokens").Exists() {
t.Error("max_tokens should be removed")
}
if gjson.GetBytes(out, "max_completion_tokens").Int() != 2048 {
t.Errorf("max_completion_tokens = %d, want 2048 (original)", gjson.GetBytes(out, "max_completion_tokens").Int())
}
}

// no max_tokens at all → payload unchanged.
func TestPromoteMaxTokens_NoOp(t *testing.T) {
in := []byte(`{"model":"gpt-5","messages":[]}`)
out := promoteMaxTokens(in)

if gjson.GetBytes(out, "max_tokens").Exists() {
t.Error("unexpected max_tokens")
}
if gjson.GetBytes(out, "max_completion_tokens").Exists() {
t.Error("unexpected max_completion_tokens")
}
}

// empty/nil payload → no panic.
func TestPromoteMaxTokens_EmptyPayload(t *testing.T) {
if out := promoteMaxTokens(nil); out != nil {
t.Error("nil input should return nil")
}
if out := promoteMaxTokens([]byte{}); len(out) != 0 {
t.Error("empty input should return empty")
}
}
Loading