Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/5868.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Handle Storage Proxy connection error from Manager side
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@
from ai.backend.manager.clients.storage_proxy.base import StorageProxyHTTPClient
from ai.backend.manager.decorators.client_decorator import create_layer_aware_client_decorator
from ai.backend.manager.defs import DEFAULT_CHUNK_SIZE
from ai.backend.manager.errors.storage import UnexpectedStorageProxyResponseError
from ai.backend.manager.errors.storage import (
StorageProxyConnectionError,
UnexpectedStorageProxyResponseError,
)

client_decorator = create_layer_aware_client_decorator(LayerType.STORAGE_PROXY_CLIENT)

Expand All @@ -57,7 +60,10 @@ async def get_volumes(self) -> Mapping[str, Any]:

:return: Response containing volume information
"""
return await self._client.request_with_response("GET", "volumes")
try:
return await self._client.request_with_response("GET", "volumes")
except aiohttp.ClientConnectionError as e:
raise StorageProxyConnectionError from e

@client_decorator()
async def create_folder(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)
from ai.backend.manager.config.unified import VolumesConfig
from ai.backend.manager.errors.storage import (
StorageProxyConnectionError,
StorageProxyNotFound,
)

Expand Down Expand Up @@ -177,7 +178,11 @@ async def _fetch(
proxy_name: str,
client: StorageProxyManagerFacingClient,
) -> Iterable[tuple[str, VolumeInfo]]:
reply = await client.get_volumes()
try:
reply = await client.get_volumes()
except StorageProxyConnectionError:
log.warning("Failed to connect to storage proxy (name: {})", proxy_name)
return []
Comment on lines +181 to +185
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't seem like this is the way to consume the error, is there a reason you worked this way?

return ((proxy_name, volume_data) for volume_data in reply["volumes"])

for proxy_name, client in self._manager_facing_clients.items():
Expand Down
13 changes: 13 additions & 0 deletions src/ai/backend/manager/errors/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,19 @@ def error_code(cls) -> ErrorCode:
)


class StorageProxyConnectionError(BackendAIError, web.HTTPClientError):
Copy link

Copilot AI Sep 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The HTTP status code should be 503 Service Unavailable (web.HTTPServiceUnavailable) instead of 400 Client Error (web.HTTPClientError) since connection failures indicate the service is temporarily unavailable, not a client request error.

Suggested change
class StorageProxyConnectionError(BackendAIError, web.HTTPClientError):
class StorageProxyConnectionError(BackendAIError, web.HTTPServiceUnavailable):

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like it is not a ClientError because it is a Server Connection Error.

error_type = "https://api.backend.ai/probs/storage-proxy-connection-error"
error_title = "Failed to connect to the storage proxy."

@classmethod
def error_code(cls) -> ErrorCode:
return ErrorCode(
domain=ErrorDomain.STORAGE_PROXY,
operation=ErrorOperation.REQUEST,
error_detail=ErrorDetail.UNREACHABLE,
)


class UnexpectedStorageProxyResponseError(BackendAIError, web.HTTPInternalServerError):
error_type = "https://api.backend.ai/probs/unexpected-storage-proxy-response"
error_title = "Unexpected response from storage proxy."
Expand Down
Loading