-
-
Notifications
You must be signed in to change notification settings - Fork 3k
fix: fall back on model support errors during auth rotation #2222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1627,53 +1627,60 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) { | |
| } | ||
|
|
||
| statusCode := statusCodeFromResult(result.Error) | ||
| switch statusCode { | ||
| case 401: | ||
| next := now.Add(30 * time.Minute) | ||
| state.NextRetryAfter = next | ||
| suspendReason = "unauthorized" | ||
| shouldSuspendModel = true | ||
| case 402, 403: | ||
| next := now.Add(30 * time.Minute) | ||
| state.NextRetryAfter = next | ||
| suspendReason = "payment_required" | ||
| shouldSuspendModel = true | ||
| case 404: | ||
| if isModelSupportResultError(result.Error) { | ||
| next := now.Add(12 * time.Hour) | ||
| state.NextRetryAfter = next | ||
| suspendReason = "not_found" | ||
| suspendReason = "model_not_supported" | ||
| shouldSuspendModel = true | ||
| case 429: | ||
| var next time.Time | ||
| backoffLevel := state.Quota.BackoffLevel | ||
| if result.RetryAfter != nil { | ||
| next = now.Add(*result.RetryAfter) | ||
| } else { | ||
| cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth)) | ||
| if cooldown > 0 { | ||
| next = now.Add(cooldown) | ||
| } else { | ||
| switch statusCode { | ||
| case 401: | ||
| next := now.Add(30 * time.Minute) | ||
| state.NextRetryAfter = next | ||
| suspendReason = "unauthorized" | ||
| shouldSuspendModel = true | ||
| case 402, 403: | ||
| next := now.Add(30 * time.Minute) | ||
| state.NextRetryAfter = next | ||
| suspendReason = "payment_required" | ||
| shouldSuspendModel = true | ||
| case 404: | ||
| next := now.Add(12 * time.Hour) | ||
| state.NextRetryAfter = next | ||
| suspendReason = "not_found" | ||
| shouldSuspendModel = true | ||
| case 429: | ||
| var next time.Time | ||
| backoffLevel := state.Quota.BackoffLevel | ||
| if result.RetryAfter != nil { | ||
| next = now.Add(*result.RetryAfter) | ||
| } else { | ||
| cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth)) | ||
| if cooldown > 0 { | ||
| next = now.Add(cooldown) | ||
| } | ||
| backoffLevel = nextLevel | ||
| } | ||
| backoffLevel = nextLevel | ||
| } | ||
| state.NextRetryAfter = next | ||
| state.Quota = QuotaState{ | ||
| Exceeded: true, | ||
| Reason: "quota", | ||
| NextRecoverAt: next, | ||
| BackoffLevel: backoffLevel, | ||
| } | ||
| suspendReason = "quota" | ||
| shouldSuspendModel = true | ||
| setModelQuota = true | ||
| case 408, 500, 502, 503, 504: | ||
| if quotaCooldownDisabledForAuth(auth) { | ||
| state.NextRetryAfter = time.Time{} | ||
| } else { | ||
| next := now.Add(1 * time.Minute) | ||
| state.NextRetryAfter = next | ||
| state.Quota = QuotaState{ | ||
| Exceeded: true, | ||
| Reason: "quota", | ||
| NextRecoverAt: next, | ||
| BackoffLevel: backoffLevel, | ||
| } | ||
| suspendReason = "quota" | ||
| shouldSuspendModel = true | ||
| setModelQuota = true | ||
| case 408, 500, 502, 503, 504: | ||
| if quotaCooldownDisabledForAuth(auth) { | ||
| state.NextRetryAfter = time.Time{} | ||
| } else { | ||
| next := now.Add(1 * time.Minute) | ||
| state.NextRetryAfter = next | ||
| } | ||
| default: | ||
| state.NextRetryAfter = time.Time{} | ||
| } | ||
| default: | ||
| state.NextRetryAfter = time.Time{} | ||
| } | ||
|
|
||
| auth.Status = StatusError | ||
|
|
@@ -1883,14 +1890,65 @@ func statusCodeFromResult(err *Error) int { | |
| return err.StatusCode() | ||
| } | ||
|
|
||
| func isModelSupportErrorMessage(message string) bool { | ||
| lower := strings.ToLower(strings.TrimSpace(message)) | ||
| if lower == "" { | ||
| return false | ||
| } | ||
| patterns := [...]string{ | ||
| "model_not_supported", | ||
| "requested model is not supported", | ||
| "requested model is unsupported", | ||
| "requested model is unavailable", | ||
| "model is not supported", | ||
| "model not supported", | ||
| "unsupported model", | ||
| "model unavailable", | ||
| "not available for your plan", | ||
| "not available for your account", | ||
| } | ||
| for _, pattern := range patterns { | ||
| if strings.Contains(lower, pattern) { | ||
| return true | ||
| } | ||
| } | ||
| return false | ||
| } | ||
|
|
||
| func isModelSupportError(err error) bool { | ||
| if err == nil { | ||
| return false | ||
| } | ||
| status := statusCodeFromError(err) | ||
| if status != http.StatusBadRequest && status != http.StatusUnprocessableEntity { | ||
| return false | ||
| } | ||
| return isModelSupportErrorMessage(err.Error()) | ||
| } | ||
|
|
||
| func isModelSupportResultError(err *Error) bool { | ||
| if err == nil { | ||
| return false | ||
| } | ||
| status := statusCodeFromResult(err) | ||
| if status != http.StatusBadRequest && status != http.StatusUnprocessableEntity { | ||
| return false | ||
| } | ||
| return isModelSupportErrorMessage(err.Message) | ||
| } | ||
|
|
||
| // isRequestInvalidError returns true if the error represents a client request | ||
| // error that should not be retried. Specifically, it treats 400 responses with | ||
| // "invalid_request_error" and all 422 responses as request-shape failures, | ||
| // where switching auths or pooled upstream models will not help. | ||
| // where switching auths or pooled upstream models will not help. Model-support | ||
| // errors are excluded so routing can fall through to another auth or upstream. | ||
| func isRequestInvalidError(err error) bool { | ||
| if err == nil { | ||
| return false | ||
| } | ||
| if isModelSupportError(err) { | ||
| return false | ||
|
Comment on lines
+2066
to
+2067
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
After this change, Useful? React with 👍 / 👎.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed in ea3e0b7. |
||
| } | ||
| status := statusCodeFromError(err) | ||
| switch status { | ||
| case http.StatusBadRequest: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a requested model expands to multiple upstream candidates on the same auth, this new branch suspends
result.Model, butexecuteMixedOnce/executeStreamWithModelPoolalways populateResult.Modelwith the aliasrouteModelrather than the failing upstream candidate. If the first candidate returns a model-support 400/422 and a later candidate succeeds, the success path resets that same alias state and callsResumeClientModel, so the unsupported upstream candidate is eligible again as soon as the pool rotates back. In practice the new suspension never sticks for OpenAI-compat alias pools, which defeats the stated goal of avoiding immediate reselection of the bad path.Useful? React with 👍 / 👎.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Addressed in ea3e0b7. The fallback state is now keyed to the concrete upstream candidate for pooled aliases, later requests skip suspended upstream members, and a fully blocked pool auth no longer burns retry budget before a healthy fallback auth can run. Added execute/stream/count regressions for repeated pooled requests and the retry-budget edge case.