Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 100 additions & 42 deletions sdk/cliproxy/auth/conductor.go
Original file line number Diff line number Diff line change
Expand Up @@ -1627,53 +1627,60 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
}

statusCode := statusCodeFromResult(result.Error)
switch statusCode {
case 401:
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "unauthorized"
shouldSuspendModel = true
case 402, 403:
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "payment_required"
shouldSuspendModel = true
case 404:
if isModelSupportResultError(result.Error) {
next := now.Add(12 * time.Hour)
state.NextRetryAfter = next
suspendReason = "not_found"
suspendReason = "model_not_supported"
shouldSuspendModel = true
Comment on lines +1747 to 1751

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve model-support suspensions across same-auth pool fallback

When a requested model expands to multiple upstream candidates on the same auth, this new branch suspends result.Model, but executeMixedOnce/executeStreamWithModelPool always populate Result.Model with the alias routeModel rather than the failing upstream candidate. If the first candidate returns a model-support 400/422 and a later candidate succeeds, the success path resets that same alias state and calls ResumeClientModel, so the unsupported upstream candidate is eligible again as soon as the pool rotates back. In practice the new suspension never sticks for OpenAI-compat alias pools, which defeats the stated goal of avoiding immediate reselection of the bad path.

Useful? React with 👍 / 👎.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in ea3e0b7. The fallback state is now keyed to the concrete upstream candidate for pooled aliases, later requests skip suspended upstream members, and a fully blocked pool auth no longer burns retry budget before a healthy fallback auth can run. Added execute/stream/count regressions for repeated pooled requests and the retry-budget edge case.

case 429:
var next time.Time
backoffLevel := state.Quota.BackoffLevel
if result.RetryAfter != nil {
next = now.Add(*result.RetryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
if cooldown > 0 {
next = now.Add(cooldown)
} else {
switch statusCode {
case 401:
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "unauthorized"
shouldSuspendModel = true
case 402, 403:
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "payment_required"
shouldSuspendModel = true
case 404:
next := now.Add(12 * time.Hour)
state.NextRetryAfter = next
suspendReason = "not_found"
shouldSuspendModel = true
case 429:
var next time.Time
backoffLevel := state.Quota.BackoffLevel
if result.RetryAfter != nil {
next = now.Add(*result.RetryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
if cooldown > 0 {
next = now.Add(cooldown)
}
backoffLevel = nextLevel
}
backoffLevel = nextLevel
}
state.NextRetryAfter = next
state.Quota = QuotaState{
Exceeded: true,
Reason: "quota",
NextRecoverAt: next,
BackoffLevel: backoffLevel,
}
suspendReason = "quota"
shouldSuspendModel = true
setModelQuota = true
case 408, 500, 502, 503, 504:
if quotaCooldownDisabledForAuth(auth) {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(1 * time.Minute)
state.NextRetryAfter = next
state.Quota = QuotaState{
Exceeded: true,
Reason: "quota",
NextRecoverAt: next,
BackoffLevel: backoffLevel,
}
suspendReason = "quota"
shouldSuspendModel = true
setModelQuota = true
case 408, 500, 502, 503, 504:
if quotaCooldownDisabledForAuth(auth) {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(1 * time.Minute)
state.NextRetryAfter = next
}
default:
state.NextRetryAfter = time.Time{}
}
default:
state.NextRetryAfter = time.Time{}
}

auth.Status = StatusError
Expand Down Expand Up @@ -1883,14 +1890,65 @@ func statusCodeFromResult(err *Error) int {
return err.StatusCode()
}

func isModelSupportErrorMessage(message string) bool {
lower := strings.ToLower(strings.TrimSpace(message))
if lower == "" {
return false
}
patterns := [...]string{
"model_not_supported",
"requested model is not supported",
"requested model is unsupported",
"requested model is unavailable",
"model is not supported",
"model not supported",
"unsupported model",
"model unavailable",
"not available for your plan",
"not available for your account",
}
for _, pattern := range patterns {
if strings.Contains(lower, pattern) {
return true
}
}
return false
}

func isModelSupportError(err error) bool {
if err == nil {
return false
}
status := statusCodeFromError(err)
if status != http.StatusBadRequest && status != http.StatusUnprocessableEntity {
return false
}
return isModelSupportErrorMessage(err.Error())
}

func isModelSupportResultError(err *Error) bool {
if err == nil {
return false
}
status := statusCodeFromResult(err)
if status != http.StatusBadRequest && status != http.StatusUnprocessableEntity {
return false
}
return isModelSupportErrorMessage(err.Message)
}

// isRequestInvalidError returns true if the error represents a client request
// error that should not be retried. Specifically, it treats 400 responses with
// "invalid_request_error" and all 422 responses as request-shape failures,
// where switching auths or pooled upstream models will not help.
// where switching auths or pooled upstream models will not help. Model-support
// errors are excluded so routing can fall through to another auth or upstream.
func isRequestInvalidError(err error) bool {
if err == nil {
return false
}
if isModelSupportError(err) {
return false
Comment on lines +2066 to +2067

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Record model-support fallback failures in ExecuteCount

After this change, ExecuteCount will also treat model-support 400/422s as fallback-eligible, but executeCountMixedOnce still only calls m.hook.OnResult instead of MarkResult (around sdk/cliproxy/auth/conductor.go:1087-1107). In setups that run token counting before execution, the current call can fall through to another auth/model, yet the unsupported auth/model is never marked unavailable, so every later count request retries the same failing credential again. The new behavior is therefore sticky for Execute/ExecuteStream, but not for ExecuteCount.

Useful? React with 👍 / 👎.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in ea3e0b7. ExecuteCount now records through MarkResult, so model-support fallback updates auth/model availability the same way as execute/stream. Added count-path regressions to cover repeated pooled requests after a model-support failure.

}
status := statusCodeFromError(err)
switch status {
case http.StatusBadRequest:
Expand Down
117 changes: 117 additions & 0 deletions sdk/cliproxy/auth/conductor_overrides_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,53 @@ func (e *credentialRetryLimitExecutor) Calls() int {
return e.calls
}

type authFallbackExecutor struct {
id string

mu sync.Mutex
executeCalls []string
executeErrors map[string]error
}

func (e *authFallbackExecutor) Identifier() string {
return e.id
}

func (e *authFallbackExecutor) Execute(_ context.Context, auth *Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
e.mu.Lock()
e.executeCalls = append(e.executeCalls, auth.ID)
err := e.executeErrors[auth.ID]
e.mu.Unlock()
if err != nil {
return cliproxyexecutor.Response{}, err
}
return cliproxyexecutor.Response{Payload: []byte(auth.ID)}, nil
}

func (e *authFallbackExecutor) ExecuteStream(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) {
return nil, &Error{HTTPStatus: 500, Message: "not implemented"}
}

func (e *authFallbackExecutor) Refresh(_ context.Context, auth *Auth) (*Auth, error) {
return auth, nil
}

func (e *authFallbackExecutor) CountTokens(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
return cliproxyexecutor.Response{}, &Error{HTTPStatus: 500, Message: "not implemented"}
}

func (e *authFallbackExecutor) HttpRequest(context.Context, *Auth, *http.Request) (*http.Response, error) {
return nil, nil
}

func (e *authFallbackExecutor) ExecuteCalls() []string {
e.mu.Lock()
defer e.mu.Unlock()
out := make([]string, len(e.executeCalls))
copy(out, e.executeCalls)
return out
}

func newCredentialRetryLimitTestManager(t *testing.T, maxRetryCredentials int) (*Manager, *credentialRetryLimitExecutor) {
t.Helper()

Expand Down Expand Up @@ -191,6 +238,76 @@ func TestManager_MaxRetryCredentials_LimitsCrossCredentialRetries(t *testing.T)
}
}

func TestManager_ModelSupportBadRequest_FallsBackAndSuspendsAuth(t *testing.T) {
m := NewManager(nil, nil, nil)
executor := &authFallbackExecutor{
id: "claude",
executeErrors: map[string]error{
"aa-bad-auth": &Error{
HTTPStatus: http.StatusBadRequest,
Message: "invalid_request_error: The requested model is not supported.",
},
},
}
m.RegisterExecutor(executor)

model := "claude-opus-4-6"
badAuth := &Auth{ID: "aa-bad-auth", Provider: "claude"}
goodAuth := &Auth{ID: "bb-good-auth", Provider: "claude"}

reg := registry.GetGlobalRegistry()
reg.RegisterClient(badAuth.ID, "claude", []*registry.ModelInfo{{ID: model}})
reg.RegisterClient(goodAuth.ID, "claude", []*registry.ModelInfo{{ID: model}})
t.Cleanup(func() {
reg.UnregisterClient(badAuth.ID)
reg.UnregisterClient(goodAuth.ID)
})

if _, errRegister := m.Register(context.Background(), badAuth); errRegister != nil {
t.Fatalf("register bad auth: %v", errRegister)
}
if _, errRegister := m.Register(context.Background(), goodAuth); errRegister != nil {
t.Fatalf("register good auth: %v", errRegister)
}

request := cliproxyexecutor.Request{Model: model}
for i := 0; i < 2; i++ {
resp, errExecute := m.Execute(context.Background(), []string{"claude"}, request, cliproxyexecutor.Options{})
if errExecute != nil {
t.Fatalf("execute %d error = %v, want success", i, errExecute)
}
if string(resp.Payload) != goodAuth.ID {
t.Fatalf("execute %d payload = %q, want %q", i, string(resp.Payload), goodAuth.ID)
}
}

got := executor.ExecuteCalls()
want := []string{badAuth.ID, goodAuth.ID, goodAuth.ID}
if len(got) != len(want) {
t.Fatalf("execute calls = %v, want %v", got, want)
}
for i := range want {
if got[i] != want[i] {
t.Fatalf("execute call %d auth = %q, want %q", i, got[i], want[i])
}
}

updatedBad, ok := m.GetByID(badAuth.ID)
if !ok || updatedBad == nil {
t.Fatalf("expected bad auth to remain registered")
}
state := updatedBad.ModelStates[model]
if state == nil {
t.Fatalf("expected model state for %q", model)
}
if !state.Unavailable {
t.Fatalf("expected bad auth model state to be unavailable")
}
if state.NextRetryAfter.IsZero() {
t.Fatalf("expected bad auth model state cooldown to be set")
}
}

func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
prev := quotaCooldownDisabled.Load()
quotaCooldownDisabled.Store(false)
Expand Down
69 changes: 69 additions & 0 deletions sdk/cliproxy/auth/openai_compat_pool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,75 @@ func TestManagerExecute_OpenAICompatAliasPoolStopsOnBadRequest(t *testing.T) {
t.Fatalf("execute calls = %v, want only first invalid model", got)
}
}

func TestManagerExecute_OpenAICompatAliasPoolFallsBackOnModelSupportBadRequest(t *testing.T) {
alias := "claude-opus-4.66"
modelSupportErr := &Error{
HTTPStatus: http.StatusBadRequest,
Message: "invalid_request_error: The requested model is not supported.",
}
executor := &openAICompatPoolExecutor{
id: "pool",
executeErrors: map[string]error{"qwen3.5-plus": modelSupportErr},
}
m := newOpenAICompatPoolTestManager(t, alias, []internalconfig.OpenAICompatibilityModel{
{Name: "qwen3.5-plus", Alias: alias},
{Name: "glm-5", Alias: alias},
}, executor)

resp, err := m.Execute(context.Background(), []string{"pool"}, cliproxyexecutor.Request{Model: alias}, cliproxyexecutor.Options{})
if err != nil {
t.Fatalf("execute error = %v, want fallback success", err)
}
if string(resp.Payload) != "glm-5" {
t.Fatalf("payload = %q, want %q", string(resp.Payload), "glm-5")
}
got := executor.ExecuteModels()
want := []string{"qwen3.5-plus", "glm-5"}
if len(got) != len(want) {
t.Fatalf("execute calls = %v, want %v", got, want)
}
for i := range want {
if got[i] != want[i] {
t.Fatalf("execute call %d model = %q, want %q", i, got[i], want[i])
}
}
}

func TestManagerExecute_OpenAICompatAliasPoolFallsBackOnModelSupportUnprocessableEntity(t *testing.T) {
alias := "claude-opus-4.66"
modelSupportErr := &Error{
HTTPStatus: http.StatusUnprocessableEntity,
Message: "The requested model is not supported.",
}
executor := &openAICompatPoolExecutor{
id: "pool",
executeErrors: map[string]error{"qwen3.5-plus": modelSupportErr},
}
m := newOpenAICompatPoolTestManager(t, alias, []internalconfig.OpenAICompatibilityModel{
{Name: "qwen3.5-plus", Alias: alias},
{Name: "glm-5", Alias: alias},
}, executor)

resp, err := m.Execute(context.Background(), []string{"pool"}, cliproxyexecutor.Request{Model: alias}, cliproxyexecutor.Options{})
if err != nil {
t.Fatalf("execute error = %v, want fallback success", err)
}
if string(resp.Payload) != "glm-5" {
t.Fatalf("payload = %q, want %q", string(resp.Payload), "glm-5")
}
got := executor.ExecuteModels()
want := []string{"qwen3.5-plus", "glm-5"}
if len(got) != len(want) {
t.Fatalf("execute calls = %v, want %v", got, want)
}
for i := range want {
if got[i] != want[i] {
t.Fatalf("execute call %d model = %q, want %q", i, got[i], want[i])
}
}
}

func TestManagerExecute_OpenAICompatAliasPoolFallsBackWithinSameAuth(t *testing.T) {
alias := "claude-opus-4.66"
executor := &openAICompatPoolExecutor{
Expand Down
Loading