router-for-me · Muran-prog · Mar 14, 2026 · Mar 14, 2026 · Mar 15, 2026 · Mar 15, 2026
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
@@ -17,6 +17,7 @@ import (
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 
@@ -108,6 +109,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		return resp, err
 	}
 
+	// newer OpenAI models require max_completion_tokens (#2101)
+	translated = promoteMaxTokens(translated)
+
 	url := strings.TrimSuffix(baseURL, "/") + endpoint
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
 	if err != nil {
@@ -205,6 +209,9 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		return nil, err
 	}
 
+	// newer OpenAI models require max_completion_tokens (#2101)
+	translated = promoteMaxTokens(translated)
+
 	// Request usage data in the final streaming chunk so that token statistics
 	// are captured even when the upstream is an OpenAI-compatible provider.
 	translated, _ = sjson.SetBytes(translated, "stream_options.include_usage", true)
@@ -386,6 +393,29 @@ func (e *OpenAICompatExecutor) overrideModel(payload []byte, model string) []byt
 	return payload
 }
 
+// promoteMaxTokens renames max_tokens → max_completion_tokens so that newer
+// OpenAI-compatible models don't reject the legacy field. If max_completion_tokens
+// is already present, max_tokens is simply removed.
+func promoteMaxTokens(payload []byte) []byte {
+	if len(payload) == 0 {
+		return payload
+	}
+	mt := gjson.GetBytes(payload, "max_tokens")
+	if !mt.Exists() {
+		return payload
+	}
+	if !gjson.GetBytes(payload, "max_completion_tokens").Exists() {
+		var err error
+		payload, err = sjson.SetBytes(payload, "max_completion_tokens", mt.Value())
+		if err != nil {
+			log.Warnf("promoteMaxTokens: failed to set max_completion_tokens: %v", err)
+			return payload
+		}
+	}
+	payload, _ = sjson.DeleteBytes(payload, "max_tokens")
+	return payload
+}
+
 type statusErr struct {
 	code       int
 	msg        string

diff --git a/internal/runtime/executor/promote_max_tokens_test.go b/internal/runtime/executor/promote_max_tokens_test.go
@@ -0,0 +1,56 @@
+package executor
+
+import (
+	"testing"
+
+	"github.com/tidwall/gjson"
+)
+
+// max_tokens present, no max_completion_tokens → rename.
+func TestPromoteMaxTokens_Rename(t *testing.T) {
+	in := []byte(`{"model":"gpt-5","max_tokens":1024,"messages":[]}`)
+	out := promoteMaxTokens(in)
+
+	if gjson.GetBytes(out, "max_tokens").Exists() {
+		t.Error("max_tokens should be removed")
+	}
+	if gjson.GetBytes(out, "max_completion_tokens").Int() != 1024 {
+		t.Errorf("max_completion_tokens = %d, want 1024", gjson.GetBytes(out, "max_completion_tokens").Int())
+	}
+}
+
+// max_completion_tokens already set → just drop max_tokens, keep existing value.
+func TestPromoteMaxTokens_AlreadySet(t *testing.T) {
+	in := []byte(`{"max_tokens":512,"max_completion_tokens":2048}`)
+	out := promoteMaxTokens(in)
+
+	if gjson.GetBytes(out, "max_tokens").Exists() {
+		t.Error("max_tokens should be removed")
+	}
+	if gjson.GetBytes(out, "max_completion_tokens").Int() != 2048 {
+		t.Errorf("max_completion_tokens = %d, want 2048 (original)", gjson.GetBytes(out, "max_completion_tokens").Int())
+	}
+}
+
+// no max_tokens at all → payload unchanged.
+func TestPromoteMaxTokens_NoOp(t *testing.T) {
+	in := []byte(`{"model":"gpt-5","messages":[]}`)
+	out := promoteMaxTokens(in)
+
+	if gjson.GetBytes(out, "max_tokens").Exists() {
+		t.Error("unexpected max_tokens")
+	}
+	if gjson.GetBytes(out, "max_completion_tokens").Exists() {
+		t.Error("unexpected max_completion_tokens")
+	}
+}
+
+// empty/nil payload → no panic.
+func TestPromoteMaxTokens_EmptyPayload(t *testing.T) {
+	if out := promoteMaxTokens(nil); out != nil {
+		t.Error("nil input should return nil")
+	}
+	if out := promoteMaxTokens([]byte{}); len(out) != 0 {
+		t.Error("empty input should return empty")
+	}
+}
diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_response.go b/internal/translator/openai/openai/responses/openai_openai-responses_response.go
@@ -457,6 +457,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 					}
 					if v := req.Get("max_output_tokens"); v.Exists() {
 						completed, _ = sjson.Set(completed, "response.max_output_tokens", v.Int())
+					} else if v := req.Get("max_completion_tokens"); v.Exists() {
+						completed, _ = sjson.Set(completed, "response.max_output_tokens", v.Int())
+					} else if v := req.Get("max_tokens"); v.Exists() {
+						completed, _ = sjson.Set(completed, "response.max_output_tokens", v.Int())
 					}
 					if v := req.Get("max_tool_calls"); v.Exists() {
 						completed, _ = sjson.Set(completed, "response.max_tool_calls", v.Int())
@@ -631,11 +635,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(_ context.Co
 		}
 		if v := req.Get("max_output_tokens"); v.Exists() {
 			resp, _ = sjson.Set(resp, "max_output_tokens", v.Int())
-		} else {
-			// Also support max_tokens from chat completion style
-			if v = req.Get("max_tokens"); v.Exists() {
-				resp, _ = sjson.Set(resp, "max_output_tokens", v.Int())
-			}
+		} else if v := req.Get("max_completion_tokens"); v.Exists() {
+			resp, _ = sjson.Set(resp, "max_output_tokens", v.Int())
+		} else if v := req.Get("max_tokens"); v.Exists() {
+			resp, _ = sjson.Set(resp, "max_output_tokens", v.Int())
 		}
 		if v := req.Get("max_tool_calls"); v.Exists() {
 			resp, _ = sjson.Set(resp, "max_tool_calls", v.Int())

diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go b/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go
@@ -0,0 +1,81 @@
+package responses
+
+import (
+	"context"
+	"testing"
+
+	"github.com/tidwall/gjson"
+)
+
+// minimal chat completion response for non-stream tests
+const minimalChatCompletion = `{
+	"id":"chatcmpl-test",
+	"object":"chat.completion",
+	"created":1700000000,
+	"model":"gpt-5",
+	"choices":[{"index":0,"message":{"role":"assistant","content":"hi"},"finish_reason":"stop"}],
+	"usage":{"prompt_tokens":5,"completion_tokens":1,"total_tokens":6}
+}`
+
+// When the translated request carries max_output_tokens (Responses-native field),
+// the response must echo it back.
+func TestNonStream_MaxOutputTokens_Direct(t *testing.T) {
+	req := []byte(`{"model":"gpt-5","max_output_tokens":4096}`)
+	resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(
+		context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil,
+	)
+	got := gjson.Get(resp, "max_output_tokens").Int()
+	if got != 4096 {
+		t.Errorf("max_output_tokens = %d, want 4096", got)
+	}
+}
+
+// After promoteMaxTokens rewrites max_tokens → max_completion_tokens,
+// the response converter must still reconstruct max_output_tokens from the
+// promoted field.
+func TestNonStream_MaxCompletionTokens_Fallback(t *testing.T) {
+	// This is what the translated request looks like after promoteMaxTokens()
+	req := []byte(`{"model":"gpt-5","max_completion_tokens":2048}`)
+	resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(
+		context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil,
+	)
+	got := gjson.Get(resp, "max_output_tokens").Int()
+	if got != 2048 {
+		t.Errorf("max_output_tokens = %d, want 2048 (from max_completion_tokens fallback)", got)
+	}
+}
+
+// Legacy max_tokens (chat completion style) must still be recognized.
+func TestNonStream_MaxTokens_LegacyFallback(t *testing.T) {
+	req := []byte(`{"model":"gpt-5","max_tokens":1024}`)
+	resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(
+		context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil,
+	)
+	got := gjson.Get(resp, "max_output_tokens").Int()
+	if got != 1024 {
+		t.Errorf("max_output_tokens = %d, want 1024 (from max_tokens legacy fallback)", got)
+	}
+}
+
+// max_output_tokens takes priority over max_completion_tokens and max_tokens.
+func TestNonStream_MaxOutputTokens_Priority(t *testing.T) {
+	req := []byte(`{"model":"gpt-5","max_output_tokens":8192,"max_completion_tokens":4096,"max_tokens":2048}`)
+	resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(
+		context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil,
+	)
+	got := gjson.Get(resp, "max_output_tokens").Int()
+	if got != 8192 {
+		t.Errorf("max_output_tokens = %d, want 8192 (max_output_tokens has priority)", got)
+	}
+}
+
+// No token limit fields → max_output_tokens should be absent.
+func TestNonStream_NoTokenLimit(t *testing.T) {
+	req := []byte(`{"model":"gpt-5"}`)
+	resp := ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(
+		context.Background(), "gpt-5", req, req, []byte(minimalChatCompletion), nil,
+	)
+	if gjson.Get(resp, "max_output_tokens").Exists() {
+		t.Error("max_output_tokens should be absent when no token limit is set")
+	}
+}