Skip to content

Commit 5acff18

Browse files
author
Cell
committed
fix: collapse multiplicative 429 retry explosion (45→15)
When a RetryingAsyncClient is used as http_client for the OpenAI SDK, the SDK's built-in retries (max_retries=2) and our own retries (max_retries=5) both independently retry 429s, multiplying with the streaming retry layer (3 attempts) to produce up to 45 retries per rate-limit event. Add disable_openai_sdk_retries() helper that constructs an AsyncOpenAI(max_retries=0) and returns it as openai_client when a RetryingAsyncClient is detected, collapsing the 3-layer nesting to 2: Before: 3 streaming × 3 SDK × 5 HTTP = 45 retries After: 3 streaming × 5 HTTP = 15 retries Applies to custom_openai, cerebras, and copilot_auth providers. Falls back to http_client mode with a warning if AsyncOpenAI construction fails (e.g. missing api_key).
1 parent a49c5ab commit 5acff18

4 files changed

Lines changed: 148 additions & 13 deletions

File tree

code_puppy/http_utils.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"""
66

77
import asyncio
8+
import logging
89
import os
910
import socket
1011
import time
@@ -17,6 +18,8 @@
1718
import requests
1819
from code_puppy.config import get_http2
1920

21+
logger = logging.getLogger(__name__)
22+
2023

2124
@dataclass
2225
class ProxyConfig:
@@ -340,6 +343,63 @@ def create_reopenable_async_client(
340343
return httpx.AsyncClient(**base_kwargs)
341344

342345

346+
def disable_openai_sdk_retries(
347+
http_client: httpx.AsyncClient,
348+
**openai_kwargs: Any,
349+
) -> dict:
350+
"""When a RetryingAsyncClient is used as http_client for the OpenAI SDK,
351+
disable the SDK's own retries to avoid multiplicative retry explosion.
352+
353+
The OpenAI SDK defaults to max_retries=2, and RetryingAsyncClient has 5.
354+
Together with 3 streaming retries, a 429 can trigger up to
355+
3 x 3 x 5 = 45 retries. Disabling SDK retries caps this at 3 x 5 = 15.
356+
357+
Returns provider kwargs. If the client is NOT a RetryingAsyncClient,
358+
returns {"http_client": client} (+ any openai_kwargs as separate keys).
359+
If it IS a RetryingAsyncClient, returns {"openai_client": AsyncOpenAI(...)}
360+
with max_retries=0 and the provided openai_kwargs folded in.
361+
Falls back to {"http_client": client} if AsyncOpenAI construction fails
362+
(e.g. missing api_key).
363+
364+
Args:
365+
http_client: The httpx client (possibly RetryingAsyncClient).
366+
**openai_kwargs: Extra kwargs for AsyncOpenAI (api_key, base_url, etc).
367+
Only used when creating an openai_client to bypass SDK retries.
368+
"""
369+
if isinstance(http_client, RetryingAsyncClient):
370+
try:
371+
from openai import AsyncOpenAI
372+
373+
openai_client = AsyncOpenAI(
374+
http_client=http_client,
375+
max_retries=0,
376+
**openai_kwargs,
377+
)
378+
return {"openai_client": openai_client}
379+
except ImportError:
380+
# openai package not installed; fall through
381+
pass
382+
except Exception as exc:
383+
# Missing api_key (OpenAIError), wrong kwargs (TypeError),
384+
# or other construction failures — fall back gracefully.
385+
try:
386+
from openai import OpenAIError as _OpenAIError
387+
388+
_warnable = (TypeError, ValueError, _OpenAIError)
389+
except ImportError:
390+
_warnable = (TypeError, ValueError)
391+
if isinstance(exc, _warnable):
392+
emit_warning(
393+
f"Could not disable OpenAI SDK retries ({exc}). "
394+
f"Falling back to http_client mode — multiplicative retries possible."
395+
)
396+
else: # pragma: no cover
397+
logger.debug("Unexpected error disabling OpenAI SDK retries: %s", exc)
398+
result = {"http_client": http_client}
399+
result.update(openai_kwargs)
400+
return result
401+
402+
343403
def is_cert_bundle_available() -> bool:
344404
cert_path = get_cert_bundle_path()
345405
if cert_path is None:

code_puppy/model_factory.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,12 @@
2525
from . import callbacks
2626
from .claude_cache_client import ClaudeCacheAsyncClient, patch_anthropic_client_messages
2727
from .config import EXTRA_MODELS_FILE, get_value, get_yolo_mode
28-
from .http_utils import create_async_client, get_cert_bundle_path, get_http2
28+
from .http_utils import (
29+
create_async_client,
30+
disable_openai_sdk_retries,
31+
get_cert_bundle_path,
32+
get_http2,
33+
)
2934
from .provider_identity import (
3035
make_anthropic_provider,
3136
make_openai_provider,
@@ -700,11 +705,20 @@ def get_model(model_name: str, config: Dict[str, Any]) -> Any:
700705
verify=verify,
701706
timeout=timeout if timeout is not None else 180,
702707
)
703-
provider_args = {"base_url": url}
704708
if isinstance(client, httpx.AsyncClient):
705-
provider_args["http_client"] = client
706-
if api_key:
707-
provider_args["api_key"] = api_key
709+
# Disable OpenAI SDK retries when using our own
710+
# RetryingAsyncClient to avoid multiplicative retry explosion
711+
# (3 streaming x 3 SDK x 5 HTTP = 45 retries on 429)
712+
openai_kwargs = {}
713+
if url:
714+
openai_kwargs["base_url"] = url
715+
if api_key:
716+
openai_kwargs["api_key"] = api_key
717+
provider_args = disable_openai_sdk_retries(client, **openai_kwargs)
718+
else:
719+
provider_args = {"base_url": url}
720+
if api_key:
721+
provider_args["api_key"] = api_key
708722
provider = make_openai_provider(provider_identity, **provider_args)
709723
model = OpenAIChatModel(model_name=model_config["name"], provider=provider)
710724
if model_name == "chatgpt-gpt-5-codex":
@@ -791,10 +805,10 @@ def model_profile(self, model_name: str) -> ModelProfile | None:
791805
model_name="cerebras",
792806
timeout=timeout if timeout is not None else 180,
793807
)
794-
provider_args = dict(
795-
api_key=api_key,
796-
http_client=client,
797-
)
808+
# Disable OpenAI SDK retries when using our own
809+
# RetryingAsyncClient to avoid multiplicative retry explosion
810+
# (3 streaming x 3 SDK x 5 HTTP = 45 retries on 429)
811+
provider_args = disable_openai_sdk_retries(client, api_key=api_key)
798812
provider = ZaiCerebrasProvider(**provider_args)
799813

800814
return OpenAIChatModel(model_name=model_config["name"], provider=provider)

code_puppy/plugins/copilot_auth/register_callbacks.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def _create_copilot_model(model_name: str, model_config: Dict, config: Dict) ->
358358
from pydantic_ai.models.openai import OpenAIChatModel
359359
from pydantic_ai.providers.openai import OpenAIProvider
360360

361-
from code_puppy.http_utils import create_async_client
361+
from code_puppy.http_utils import create_async_client, disable_openai_sdk_retries
362362

363363
# Discover token — match against the host stored in the model config
364364
host = model_config.get("copilot_host", "github.com")
@@ -414,12 +414,14 @@ def auth_flow(self, request: httpx.Request):
414414
if config_url:
415415
base_url = config_url
416416

417-
# Use a placeholder API key — the actual token is injected by _CopilotAuth
418-
provider = OpenAIProvider(
417+
# Disable OpenAI SDK retries when using our own RetryingAsyncClient
418+
# to avoid multiplicative retry explosion (3 streaming x 3 SDK x 5 HTTP = 45)
419+
provider_kwargs = disable_openai_sdk_retries(
420+
client,
419421
api_key="copilot-session-managed",
420422
base_url=base_url,
421-
http_client=client,
422423
)
424+
provider = OpenAIProvider(**provider_kwargs)
423425

424426
# Build a model profile that tells pydantic-ai how to handle thinking.
425427
# Claude models behind the Copilot API return thinking in a custom field

tests/test_http_utils.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import os
1313
from unittest.mock import patch
1414

15+
import httpx
16+
1517
from code_puppy.http_utils import ProxyConfig
1618

1719

@@ -370,3 +372,60 @@ def test_find_available_port_multiple_calls(self):
370372
# Both should be valid ports
371373
assert isinstance(port1, int) and isinstance(port2, int)
372374
assert port1 > 0 and port2 > 0
375+
376+
377+
class TestDisableOpenAISdkRetries:
378+
"""Test disable_openai_sdk_retries helper."""
379+
380+
def test_plain_client_returns_http_client(self):
381+
"""Plain httpx.AsyncClient should just pass through."""
382+
from code_puppy.http_utils import disable_openai_sdk_retries
383+
384+
client = httpx.AsyncClient()
385+
result = disable_openai_sdk_retries(client)
386+
assert result == {"http_client": client}
387+
388+
def test_plain_client_passes_openai_kwargs(self):
389+
"""openai_kwargs should be added as separate keys for plain clients."""
390+
from code_puppy.http_utils import disable_openai_sdk_retries
391+
392+
client = httpx.AsyncClient()
393+
result = disable_openai_sdk_retries(
394+
client, api_key="test-key", base_url="https://example.com"
395+
)
396+
assert result["http_client"] is client
397+
assert result["api_key"] == "test-key"
398+
assert result["base_url"] == "https://example.com"
399+
400+
def test_retrying_client_creates_openai_client(self):
401+
"""RetryingAsyncClient should produce an openai_client with max_retries=0."""
402+
from code_puppy.http_utils import (
403+
RetryingAsyncClient,
404+
disable_openai_sdk_retries,
405+
)
406+
407+
client = RetryingAsyncClient(max_retries=5)
408+
result = disable_openai_sdk_retries(
409+
client, api_key="test-key", base_url="https://example.com"
410+
)
411+
assert "openai_client" in result
412+
assert "http_client" not in result # replaced by openai_client
413+
assert result["openai_client"].max_retries == 0
414+
415+
def test_retrying_client_falls_back_on_missing_api_key(self):
416+
"""If AsyncOpenAI creation fails, fall back to http_client."""
417+
from code_puppy.http_utils import (
418+
RetryingAsyncClient,
419+
disable_openai_sdk_retries,
420+
)
421+
422+
client = RetryingAsyncClient(max_retries=5)
423+
# No api_key and no OPENAI_API_KEY env var → AsyncOpenAI will fail
424+
with patch.dict(os.environ, {}, clear=True):
425+
with patch("code_puppy.http_utils.emit_warning") as mock_warn:
426+
result = disable_openai_sdk_retries(client)
427+
assert "http_client" in result
428+
assert result["http_client"] is client
429+
# Should have warned about falling back
430+
mock_warn.assert_called_once()
431+
assert "multiplicative" in mock_warn.call_args[0][0].lower()

0 commit comments

Comments
 (0)