From 58babed49ad8a74bf38e78782a0de721d58be8e0 Mon Sep 17 00:00:00 2001 From: GeJiaXiang <353358601@qq.com> Date: Wed, 11 Mar 2026 16:42:19 +0800 Subject: [PATCH 1/3] feat: add per-credential response-header-timeout and treat 524 as transient error - Add ResponseHeaderTimeout config field to ClaudeKey for per-upstream timeout control (in seconds). Only limits wait for first response header; streaming responses are unaffected. - Pass timeout value through Auth.Attributes to executor layer. - Apply ResponseHeaderTimeout to http.Transport in proxy_helpers.go, covering proxy, context-RoundTripper, and bare-transport code paths. - Add 524 (Cloudflare timeout) to transient error list in conductor.go. - Wrap Go net.Error timeout as statusErr{code: 504} in claude_executor.go so conductor properly applies 1-minute cooldown on timeout failures, preventing repeated 15s waits on the same failing upstream. --- internal/config/config.go | 5 ++++ internal/runtime/executor/claude_executor.go | 7 +++++ internal/runtime/executor/proxy_helpers.go | 28 ++++++++++++++++++++ internal/watcher/synthesizer/config.go | 3 +++ sdk/cliproxy/auth/conductor.go | 2 +- 5 files changed, 44 insertions(+), 1 deletion(-) diff --git a/internal/config/config.go b/internal/config/config.go index 5a6595f778..6ac945fc84 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -339,6 +339,11 @@ type ClaudeKey struct { // ExcludedModels lists model IDs that should be excluded for this provider. ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"` + // ResponseHeaderTimeout limits how long to wait for the upstream to start + // responding (in seconds). Once the first response byte arrives, this timeout + // no longer applies — streaming responses are not affected. 0 means no timeout. + ResponseHeaderTimeout int `yaml:"response-header-timeout,omitempty" json:"response-header-timeout,omitempty"` + // Cloak configures request cloaking for non-Claude-Code clients. Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"` } diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 82b12a2f80..9baeec28cb 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -12,6 +12,7 @@ import ( "encoding/json" "fmt" "io" + "net" "net/http" "net/textproto" "runtime" @@ -183,6 +184,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) + if netErr, ok := err.(net.Error); ok && netErr.Timeout() { + return resp, statusErr{code: http.StatusGatewayTimeout, msg: fmt.Sprintf("upstream timeout: %v", err)} + } return resp, err } recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) @@ -346,6 +350,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) + if netErr, ok := err.(net.Error); ok && netErr.Timeout() { + return nil, statusErr{code: http.StatusGatewayTimeout, msg: fmt.Sprintf("upstream timeout: %v", err)} + } return nil, err } recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) diff --git a/internal/runtime/executor/proxy_helpers.go b/internal/runtime/executor/proxy_helpers.go index ab0f626acc..3d721356e3 100644 --- a/internal/runtime/executor/proxy_helpers.go +++ b/internal/runtime/executor/proxy_helpers.go @@ -5,6 +5,7 @@ import ( "net" "net/http" "net/url" + "strconv" "strings" "time" @@ -33,6 +34,16 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip httpClient.Timeout = timeout } + // Read per-auth response header timeout from attributes. + var respHeaderTimeout time.Duration + if auth != nil && auth.Attributes != nil { + if v, ok := auth.Attributes["response_header_timeout"]; ok { + if secs, err := strconv.Atoi(v); err == nil && secs > 0 { + respHeaderTimeout = time.Duration(secs) * time.Second + } + } + } + // Priority 1: Use auth.ProxyURL if configured var proxyURL string if auth != nil { @@ -48,6 +59,9 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip if proxyURL != "" { transport := buildProxyTransport(proxyURL) if transport != nil { + if respHeaderTimeout > 0 { + transport.ResponseHeaderTimeout = respHeaderTimeout + } httpClient.Transport = transport return httpClient } @@ -60,6 +74,20 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip httpClient.Transport = rt } + // Apply ResponseHeaderTimeout if set. + if respHeaderTimeout > 0 { + if transport, ok := httpClient.Transport.(*http.Transport); ok { + // Clone the transport to avoid modifying a shared instance. + clonedTransport := transport.Clone() + clonedTransport.ResponseHeaderTimeout = respHeaderTimeout + httpClient.Transport = clonedTransport + } else if httpClient.Transport == nil { + httpClient.Transport = &http.Transport{ + ResponseHeaderTimeout: respHeaderTimeout, + } + } + } + return httpClient } diff --git a/internal/watcher/synthesizer/config.go b/internal/watcher/synthesizer/config.go index 52ae9a4808..ca1729bff1 100644 --- a/internal/watcher/synthesizer/config.go +++ b/internal/watcher/synthesizer/config.go @@ -116,6 +116,9 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea if hash := diff.ComputeClaudeModelsHash(ck.Models); hash != "" { attrs["models_hash"] = hash } + if ck.ResponseHeaderTimeout > 0 { + attrs["response_header_timeout"] = strconv.Itoa(ck.ResponseHeaderTimeout) + } addConfigHeadersToAttrs(ck.Headers, attrs) proxyURL := strings.TrimSpace(ck.ProxyURL) a := &coreauth.Auth{ diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index b29e04db8c..e6dcf93a0f 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1942,7 +1942,7 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati } auth.Quota.NextRecoverAt = next auth.NextRetryAfter = next - case 408, 500, 502, 503, 504: + case 408, 500, 502, 503, 504, 524: auth.StatusMessage = "transient upstream error" if quotaCooldownDisabledForAuth(auth) { auth.NextRetryAfter = time.Time{} From 6ae0c35286ca07d093665b003501d6b5748854b6 Mon Sep 17 00:00:00 2001 From: GeJiaXiang <353358601@qq.com> Date: Wed, 11 Mar 2026 18:21:31 +0800 Subject: [PATCH 2/3] feat: add per-credential transient-error-cooldown config field --- internal/config/config.go | 5 +++++ internal/watcher/synthesizer/config.go | 3 +++ sdk/cliproxy/auth/conductor.go | 10 +++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/internal/config/config.go b/internal/config/config.go index 6ac945fc84..d7b10afec2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -344,6 +344,11 @@ type ClaudeKey struct { // no longer applies — streaming responses are not affected. 0 means no timeout. ResponseHeaderTimeout int `yaml:"response-header-timeout,omitempty" json:"response-header-timeout,omitempty"` + // TransientErrorCooldown overrides the default 1-minute cooldown applied when + // a transient error (408/500/502/503/504/524) is received from this upstream + // (in seconds). 0 means use the default (60s). + TransientErrorCooldown int `yaml:"transient-error-cooldown,omitempty" json:"transient-error-cooldown,omitempty"` + // Cloak configures request cloaking for non-Claude-Code clients. Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"` } diff --git a/internal/watcher/synthesizer/config.go b/internal/watcher/synthesizer/config.go index ca1729bff1..ad36edf724 100644 --- a/internal/watcher/synthesizer/config.go +++ b/internal/watcher/synthesizer/config.go @@ -119,6 +119,9 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea if ck.ResponseHeaderTimeout > 0 { attrs["response_header_timeout"] = strconv.Itoa(ck.ResponseHeaderTimeout) } + if ck.TransientErrorCooldown > 0 { + attrs["transient_error_cooldown"] = strconv.Itoa(ck.TransientErrorCooldown) + } addConfigHeadersToAttrs(ck.Headers, attrs) proxyURL := strings.TrimSpace(ck.ProxyURL) a := &coreauth.Auth{ diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index e6dcf93a0f..1fddde1716 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1947,7 +1947,15 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati if quotaCooldownDisabledForAuth(auth) { auth.NextRetryAfter = time.Time{} } else { - auth.NextRetryAfter = now.Add(1 * time.Minute) + cooldown := 1 * time.Minute + if auth.Attributes != nil { + if v, ok := auth.Attributes["transient_error_cooldown"]; ok { + if secs, err := strconv.Atoi(v); err == nil && secs > 0 { + cooldown = time.Duration(secs) * time.Second + } + } + } + auth.NextRetryAfter = now.Add(cooldown) } default: if auth.StatusMessage == "" { From 0c70af48f0bf6635956ecf476fbfd6015ef88174 Mon Sep 17 00:00:00 2001 From: GeJiaXiang <353358601@qq.com> Date: Thu, 12 Mar 2026 09:58:16 +0800 Subject: [PATCH 3/3] fix: cover 524 in model-scoped branch and use DefaultTransport.Clone() - Add 524 to transient error list in MarkResult model-scoped branch - Add transient_error_cooldown config support in model-scoped branch - Use http.DefaultTransport.Clone() instead of zero-value Transport Fixes code review feedback from PR #2060 --- internal/runtime/executor/proxy_helpers.go | 6 +++--- sdk/cliproxy/auth/conductor.go | 12 ++++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/internal/runtime/executor/proxy_helpers.go b/internal/runtime/executor/proxy_helpers.go index 3d721356e3..7bc8b9575c 100644 --- a/internal/runtime/executor/proxy_helpers.go +++ b/internal/runtime/executor/proxy_helpers.go @@ -82,9 +82,9 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip clonedTransport.ResponseHeaderTimeout = respHeaderTimeout httpClient.Transport = clonedTransport } else if httpClient.Transport == nil { - httpClient.Transport = &http.Transport{ - ResponseHeaderTimeout: respHeaderTimeout, - } + transport := http.DefaultTransport.(*http.Transport).Clone() + transport.ResponseHeaderTimeout = respHeaderTimeout + httpClient.Transport = transport } } diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 1fddde1716..c087a9937b 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1665,11 +1665,19 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) { suspendReason = "quota" shouldSuspendModel = true setModelQuota = true - case 408, 500, 502, 503, 504: + case 408, 500, 502, 503, 504, 524: if quotaCooldownDisabledForAuth(auth) { state.NextRetryAfter = time.Time{} } else { - next := now.Add(1 * time.Minute) + cooldown := 1 * time.Minute + if auth.Attributes != nil { + if v, ok := auth.Attributes["transient_error_cooldown"]; ok { + if secs, err := strconv.Atoi(v); err == nil && secs > 0 { + cooldown = time.Duration(secs) * time.Second + } + } + } + next := now.Add(cooldown) state.NextRetryAfter = next } default: