diff --git a/src/hooks/runtime-fallback/constants.ts b/src/hooks/runtime-fallback/constants.ts index 19a7cad563..600940b094 100644 --- a/src/hooks/runtime-fallback/constants.ts +++ b/src/hooks/runtime-fallback/constants.ts @@ -35,6 +35,9 @@ export const RETRYABLE_ERROR_PATTERNS = [ /service.?unavailable/i, /overloaded/i, /temporarily.?unavailable/i, + /hit.{0,10}(?:your|the)?.{0,5}limit/i, + /spending.?cap/i, + /usage.?limit/i, /try.?again/i, /(?:^|\s)429(?:\s|$)/, /(?:^|\s)503(?:\s|$)/, diff --git a/src/shared/model-error-classifier.test.ts b/src/shared/model-error-classifier.test.ts index a1f7c52654..92bf01d148 100644 --- a/src/shared/model-error-classifier.test.ts +++ b/src/shared/model-error-classifier.test.ts @@ -3,7 +3,7 @@ const { describe, expect, test, beforeEach, afterEach, mock, spyOn } = require(" import * as connectedProvidersCache from "./connected-providers-cache" let readConnectedProvidersCacheSpy: ReturnType | undefined -const { shouldRetryError, selectFallbackProvider } = await import("./model-error-classifier") +const { shouldRetryError, isRetryableModelError, isStopModelError, selectFallbackProvider } = await import("./model-error-classifier") describe("model-error-classifier", () => { beforeEach(() => { @@ -16,226 +16,410 @@ describe("model-error-classifier", () => { readConnectedProvidersCacheSpy = undefined }) - test("treats overloaded retry messages as retryable", () => { - //#given - const error = { message: "Provider is overloaded" } + describe("#shouldRetryError", () => { + test("treats overloaded retry messages as retryable", () => { + //#given + const error = { message: "Provider is overloaded" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(true) - }) + //#then + expect(result).toBe(true) + }) - test("treats cooling-down auto-retry messages as retryable", () => { - //#given - const error = { - message: - "All credentials for model claude-opus-4-6-thinking are cooling down [retrying in ~5 days attempt #1]", - } + test("treats cooling-down auto-retry messages as retryable", () => { + //#given + const error = { + message: + "All credentials for model claude-opus-4-6-thinking are cooling down [retrying in ~5 days attempt #1]", + } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(true) - }) + //#then + expect(result).toBe(true) + }) - test("selectFallbackProvider prefers first connected provider in preference order", () => { - //#given - readConnectedProvidersCacheSpy?.mockReturnValue(["anthropic", "nvidia"]) + test("treats 'bad request' message as retryable (GitHub Copilot rolling update)", () => { + //#given + const error = { message: "400 Bad Request" } - //#when - const provider = selectFallbackProvider(["anthropic", "nvidia"], "nvidia") + //#when + const result = shouldRetryError(error) - //#then - expect(provider).toBe("anthropic") - }) + //#then + expect(result).toBe(true) + }) - test("selectFallbackProvider falls back to next connected provider when first is disconnected", () => { - //#given - readConnectedProvidersCacheSpy?.mockReturnValue(["nvidia"]) + test("treats 'bad request' lowercase as retryable", () => { + //#given + const error = { message: "bad request: model temporarily unavailable" } - //#when - const provider = selectFallbackProvider(["anthropic", "nvidia"]) + //#when + const result = shouldRetryError(error) - //#then - expect(provider).toBe("nvidia") - }) + //#then + expect(result).toBe(true) + }) - test("selectFallbackProvider uses provider preference order when cache is missing", () => { - //#given - no cache file + test("treats HTTP 429 rate limit message as retryable", () => { + //#given + const error = { message: "429 Too Many Requests: rate limit reached" } - //#when - const provider = selectFallbackProvider(["anthropic", "nvidia"], "nvidia") + //#when + const result = shouldRetryError(error) - //#then - expect(provider).toBe("anthropic") - }) + //#then + expect(result).toBe(true) + }) - test("selectFallbackProvider uses connected preferred provider when fallback providers are unavailable", () => { - //#given - readConnectedProvidersCacheSpy?.mockReturnValue(["provider-x"]) + test("triggers fallback for QuotaExceededError (provider exhausted, different provider may work)", () => { + //#given + const error = { name: "QuotaExceededError" } - //#when - const provider = selectFallbackProvider(["provider-y"], "provider-x") + //#when + const result = shouldRetryError(error) - //#then - expect(provider).toBe("provider-x") - }) + //#then + expect(result).toBe(true) + }) - test("treats QuotaExceededError (PascalCase name) as non-retryable STOP error", () => { - //#given - const error = { name: "QuotaExceededError" } + test("triggers fallback for InsufficientCreditsError", () => { + //#given + const error = { name: "InsufficientCreditsError" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(false) - }) + //#then + expect(result).toBe(true) + }) - test("treats quotaexceedederror (lowercase name) as non-retryable STOP error", () => { - //#given - const error = { name: "quotaexceedederror" } + test("triggers fallback for FreeUsageLimitError", () => { + //#given + const error = { name: "FreeUsageLimitError" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(false) - }) + //#then + expect(result).toBe(true) + }) - test("treats InsufficientCreditsError (PascalCase name) as non-retryable STOP error", () => { - //#given - const error = { name: "InsufficientCreditsError" } + test("triggers fallback for quota reset message", () => { + //#given + const error = { message: "quota will reset after 1 hour" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(false) - }) + //#then + expect(result).toBe(true) + }) - test("treats insufficientcreditserror (lowercase name) as non-retryable STOP error", () => { - //#given - const error = { name: "insufficientcreditserror" } + test("triggers fallback for quota exceeded message", () => { + //#given + const error = { message: "quota exceeded for this billing period" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(false) - }) + //#then + expect(result).toBe(true) + }) - test("treats FreeUsageLimitError (PascalCase name) as non-retryable STOP error", () => { - //#given - const error = { name: "FreeUsageLimitError" } + test("triggers fallback for 'usage limit has been reached' message", () => { + //#given + const error = { message: "The usage limit has been reached for your account" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(false) - }) + //#then + expect(result).toBe(true) + }) - test("treats freeusagelimiterror (lowercase name) as non-retryable STOP error", () => { - //#given - const error = { name: "freeusagelimiterror" } + test("triggers fallback for insufficient credits message", () => { + //#given + const error = { message: "insufficient credits to complete this request" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(false) - }) + //#then + expect(result).toBe(true) + }) - test("treats quota reset message as non-retryable STOP error (no error name)", () => { - //#given - const error = { message: "quota will reset after 1 hour" } + test("triggers fallback for subscription quota message", () => { + //#given + const error = { message: "Subscription quota exceeded. You can continue using free models." } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(false) - }) + //#then + expect(result).toBe(true) + }) - test("treats quota exceeded message as non-retryable STOP error (no error name)", () => { - //#given - const error = { message: "quota exceeded for this billing period" } + test("triggers fallback for spending cap message", () => { + //#given + const error = { message: "Your project has exceeded its monthly spending cap" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(false) - }) + //#then + expect(result).toBe(true) + }) - test("treats usage limit reached message as non-retryable STOP error (no error name)", () => { - //#given - const error = { message: "usage limit has been reached for your account" } + test("returns false for MessageAbortedError (non-retryable)", () => { + //#given + const error = { name: "MessageAbortedError" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(false) - }) + //#then + expect(result).toBe(false) + }) - test("treats insufficient credits message as non-retryable STOP error (no error name)", () => { - //#given - const error = { message: "insufficient credits to complete this request" } + test("returns false for ContextLengthError (non-retryable)", () => { + //#given + const error = { name: "ContextLengthError" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(false) - }) + //#then + expect(result).toBe(false) + }) - test("treats 'bad request' message as retryable (GitHub Copilot rolling update)", () => { - //#given - const error = { message: "400 Bad Request" } + test("returns false for ValidationError (non-retryable)", () => { + //#given + const error = { name: "ValidationError" } - //#when - const result = shouldRetryError(error) + //#when + const result = shouldRetryError(error) - //#then - expect(result).toBe(true) + //#then + expect(result).toBe(false) + }) + + test("returns false for unknown error with no matching pattern", () => { + //#given + const error = { message: "some completely unrelated error" } + + //#when + const result = shouldRetryError(error) + + //#then + expect(result).toBe(false) + }) }) - test("treats 'bad request' lowercase as retryable", () => { - //#given - const error = { message: "bad request: model temporarily unavailable" } + describe("#isRetryableModelError", () => { + test("returns true for RateLimitError", () => { + //#given + const error = { name: "RateLimitError" } + + //#when + const result = isRetryableModelError(error) + + //#then + expect(result).toBe(true) + }) + + test("returns false for QuotaExceededError (STOP, not retryable on same provider)", () => { + //#given + const error = { name: "QuotaExceededError" } + + //#when + const result = isRetryableModelError(error) + + //#then + expect(result).toBe(false) + }) - //#when - const result = shouldRetryError(error) + test("returns false for usage limit message (STOP, not retryable on same provider)", () => { + //#given + const error = { message: "usage limit has been reached for your account" } - //#then - expect(result).toBe(true) + //#when + const result = isRetryableModelError(error) + + //#then + expect(result).toBe(false) + }) + + test("returns false for MessageAbortedError (non-retryable)", () => { + //#given + const error = { name: "MessageAbortedError" } + + //#when + const result = isRetryableModelError(error) + + //#then + expect(result).toBe(false) + }) }) - test("treats subscription quota message as non-retryable", () => { - //#given - const error = { message: "Subscription quota exceeded. You can continue using free models." } + describe("#isStopModelError", () => { + test("returns true for QuotaExceededError", () => { + //#given + const error = { name: "QuotaExceededError" } + + //#when + const result = isStopModelError(error) + + //#then + expect(result).toBe(true) + }) + + test("returns true for quotaexceedederror (lowercase)", () => { + //#given + const error = { name: "quotaexceedederror" } + + //#when + const result = isStopModelError(error) - //#when - const result = shouldRetryError(error) + //#then + expect(result).toBe(true) + }) - //#then - expect(result).toBe(false) + test("returns true for InsufficientCreditsError", () => { + //#given + const error = { name: "InsufficientCreditsError" } + + //#when + const result = isStopModelError(error) + + //#then + expect(result).toBe(true) + }) + + test("returns true for FreeUsageLimitError", () => { + //#given + const error = { name: "FreeUsageLimitError" } + + //#when + const result = isStopModelError(error) + + //#then + expect(result).toBe(true) + }) + + test("returns true for 'usage limit has been reached' message", () => { + //#given + const error = { message: "The usage limit has been reached" } + + //#when + const result = isStopModelError(error) + + //#then + expect(result).toBe(true) + }) + + test("returns true for quota exceeded message", () => { + //#given + const error = { message: "quota exceeded for this billing period" } + + //#when + const result = isStopModelError(error) + + //#then + expect(result).toBe(true) + }) + + test("returns true for spending cap message", () => { + //#given + const error = { message: "exceeded its monthly spending cap" } + + //#when + const result = isStopModelError(error) + + //#then + expect(result).toBe(true) + }) + + test("returns false for RateLimitError (retryable, not stop)", () => { + //#given + const error = { name: "RateLimitError" } + + //#when + const result = isStopModelError(error) + + //#then + expect(result).toBe(false) + }) + + test("returns false for MessageAbortedError (non-retryable, not stop)", () => { + //#given + const error = { name: "MessageAbortedError" } + + //#when + const result = isStopModelError(error) + + //#then + expect(result).toBe(false) + }) + + test("returns false for unknown error with no stop pattern", () => { + //#given + const error = { message: "some random error" } + + //#when + const result = isStopModelError(error) + + //#then + expect(result).toBe(false) + }) }) - test("treats HTTP 429 rate limit message as retryable", () => { - //#given - const error = { message: "429 Too Many Requests: rate limit reached" } + describe("#selectFallbackProvider", () => { + test("prefers first connected provider in preference order", () => { + //#given + readConnectedProvidersCacheSpy?.mockReturnValue(["anthropic", "nvidia"]) + + //#when + const provider = selectFallbackProvider(["anthropic", "nvidia"], "nvidia") + + //#then + expect(provider).toBe("anthropic") + }) + + test("falls back to next connected provider when first is disconnected", () => { + //#given + readConnectedProvidersCacheSpy?.mockReturnValue(["nvidia"]) + + //#when + const provider = selectFallbackProvider(["anthropic", "nvidia"]) + + //#then + expect(provider).toBe("nvidia") + }) + + test("uses provider preference order when cache is missing", () => { + //#given - no cache file + + //#when + const provider = selectFallbackProvider(["anthropic", "nvidia"], "nvidia") + + //#then + expect(provider).toBe("anthropic") + }) + + test("uses connected preferred provider when fallback providers are unavailable", () => { + //#given + readConnectedProvidersCacheSpy?.mockReturnValue(["provider-x"]) - //#when - const result = shouldRetryError(error) + //#when + const provider = selectFallbackProvider(["provider-y"], "provider-x") - //#then - expect(result).toBe(true) + //#then + expect(provider).toBe("provider-x") + }) }) }) diff --git a/src/shared/model-error-classifier.ts b/src/shared/model-error-classifier.ts index b20918d189..3362531992 100644 --- a/src/shared/model-error-classifier.ts +++ b/src/shared/model-error-classifier.ts @@ -48,6 +48,8 @@ const RETRYABLE_MESSAGE_PATTERNS = [ "insufficient", "too many requests", "over limit", + "hit your limit", + "hit the limit", "overloaded", "bad gateway", "bad request", @@ -81,7 +83,6 @@ const RETRYABLE_MESSAGE_PATTERNS = [ const STOP_MESSAGE_PATTERNS = [ "quota will reset after", "quota exceeded", - "usage limit has been reached", "free usage limit", "billing limit", "billing hard limit", @@ -96,7 +97,9 @@ const STOP_MESSAGE_PATTERNS = [ "insufficient balance", "credit balance", "usage limit for this month", + "usage limit has been reached", "exhausted your capacity", + "spending cap", ] const AUTO_RETRY_GATE_PATTERNS = [ @@ -153,11 +156,50 @@ export function isRetryableModelError(error: ErrorInfo): boolean { } /** - * Determines if an error should trigger a fallback retry. - * Returns true for deadstop errors that completely halt the action loop. + * Determines if an error indicates provider exhaustion (quota/billing/usage limits). + * These errors mean the current provider cannot serve requests, but a different provider might. + */ +export function isStopModelError(error: ErrorInfo): boolean { + if (error.name) { + const errorNameLower = error.name.toLowerCase() + if (STOP_ERROR_NAMES.has(errorNameLower)) { + return true + } + } + + const msg = error.message?.toLowerCase() ?? "" + return STOP_MESSAGE_PATTERNS.some((pattern) => msg.includes(pattern)) +} + +/** + * Determines if an error should trigger cross-provider fallback. + * Returns true for both retryable errors AND stop errors (provider exhaustion), + * since a different provider may still be able to serve the request. + * Returns false only for non-retryable errors (user aborts, validation, syntax) + * where no provider switch would help. */ export function shouldRetryError(error: ErrorInfo): boolean { - return isRetryableModelError(error) + if (error.name) { + const errorNameLower = error.name.toLowerCase() + if (NON_RETRYABLE_ERROR_NAMES.has(errorNameLower)) { + return false + } + if (RETRYABLE_ERROR_NAMES.has(errorNameLower) || STOP_ERROR_NAMES.has(errorNameLower)) { + return true + } + } + + const msg = error.message?.toLowerCase() ?? "" + + if (STOP_MESSAGE_PATTERNS.some((pattern) => msg.includes(pattern))) { + return true + } + + if (hasProviderAutoRetrySignal(msg)) { + return true + } + + return RETRYABLE_MESSAGE_PATTERNS.some((pattern) => msg.includes(pattern)) } /**