router-for-me · Muran-prog · Mar 14, 2026 · Mar 14, 2026 · Mar 15, 2026 · Mar 15, 2026
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
@@ -17,6 +17,7 @@ import (
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 
@@ -108,6 +109,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		return resp, err
 	}
 
+	// newer OpenAI models require max_completion_tokens (#2101)
+	translated = promoteMaxTokens(translated)
+
 	url := strings.TrimSuffix(baseURL, "/") + endpoint
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
 	if err != nil {
@@ -205,6 +209,9 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		return nil, err
 	}
 
+	// newer OpenAI models require max_completion_tokens (#2101)
+	translated = promoteMaxTokens(translated)
+
 	// Request usage data in the final streaming chunk so that token statistics
 	// are captured even when the upstream is an OpenAI-compatible provider.
 	translated, _ = sjson.SetBytes(translated, "stream_options.include_usage", true)
@@ -386,6 +393,24 @@ func (e *OpenAICompatExecutor) overrideModel(payload []byte, model string) []byt
 	return payload
 }
 
+// promoteMaxTokens renames max_tokens → max_completion_tokens so that newer
+// OpenAI-compatible models don't reject the legacy field. If max_completion_tokens
+// is already present, max_tokens is simply removed.
+func promoteMaxTokens(payload []byte) []byte {
+	if len(payload) == 0 {
+		return payload
+	}
+	mt := gjson.GetBytes(payload, "max_tokens")
+	if !mt.Exists() {
+		return payload
+	}
+	if !gjson.GetBytes(payload, "max_completion_tokens").Exists() {
+		payload, _ = sjson.SetBytes(payload, "max_completion_tokens", mt.Value())
+	}
+	payload, _ = sjson.DeleteBytes(payload, "max_tokens")
+	return payload
+}
+
 type statusErr struct {
 	code       int
 	msg        string

diff --git a/internal/runtime/executor/promote_max_tokens_test.go b/internal/runtime/executor/promote_max_tokens_test.go
@@ -0,0 +1,56 @@
+package executor
+
+import (
+	"testing"
+
+	"github.com/tidwall/gjson"
+)
+
+// max_tokens present, no max_completion_tokens → rename.
+func TestPromoteMaxTokens_Rename(t *testing.T) {
+	in := []byte(`{"model":"gpt-5","max_tokens":1024,"messages":[]}`)
+	out := promoteMaxTokens(in)
+
+	if gjson.GetBytes(out, "max_tokens").Exists() {
+		t.Error("max_tokens should be removed")
+	}
+	if gjson.GetBytes(out, "max_completion_tokens").Int() != 1024 {
+		t.Errorf("max_completion_tokens = %d, want 1024", gjson.GetBytes(out, "max_completion_tokens").Int())
+	}
+}
+
+// max_completion_tokens already set → just drop max_tokens, keep existing value.
+func TestPromoteMaxTokens_AlreadySet(t *testing.T) {
+	in := []byte(`{"max_tokens":512,"max_completion_tokens":2048}`)
+	out := promoteMaxTokens(in)
+
+	if gjson.GetBytes(out, "max_tokens").Exists() {
+		t.Error("max_tokens should be removed")
+	}
+	if gjson.GetBytes(out, "max_completion_tokens").Int() != 2048 {
+		t.Errorf("max_completion_tokens = %d, want 2048 (original)", gjson.GetBytes(out, "max_completion_tokens").Int())
+	}
+}
+
+// no max_tokens at all → payload unchanged.
+func TestPromoteMaxTokens_NoOp(t *testing.T) {
+	in := []byte(`{"model":"gpt-5","messages":[]}`)
+	out := promoteMaxTokens(in)
+
+	if gjson.GetBytes(out, "max_tokens").Exists() {
+		t.Error("unexpected max_tokens")
+	}
+	if gjson.GetBytes(out, "max_completion_tokens").Exists() {
+		t.Error("unexpected max_completion_tokens")
+	}
+}
+
+// empty/nil payload → no panic.
+func TestPromoteMaxTokens_EmptyPayload(t *testing.T) {
+	if out := promoteMaxTokens(nil); out != nil {
+		t.Error("nil input should return nil")
+	}
+	if out := promoteMaxTokens([]byte{}); len(out) != 0 {
+		t.Error("empty input should return empty")
+	}
+}