diff --git a/src/docs2vecs/subcommands/indexer/skills/bedrock_titan_embedding_skill.py b/src/docs2vecs/subcommands/indexer/skills/bedrock_titan_embedding_skill.py index bea5642..db3418b 100644 --- a/src/docs2vecs/subcommands/indexer/skills/bedrock_titan_embedding_skill.py +++ b/src/docs2vecs/subcommands/indexer/skills/bedrock_titan_embedding_skill.py @@ -3,11 +3,24 @@ from typing import List, Optional import boto3 +from botocore.exceptions import BotoCoreError, ClientError from docs2vecs.subcommands.indexer.config.config import Config from docs2vecs.subcommands.indexer.document.document import Document from docs2vecs.subcommands.indexer.skills.skill import IndexerSkill +# Bedrock error codes that represent transient failures worth retrying. +# Permanent errors (ValidationException, AccessDeniedException, +# ResourceNotFoundException, etc.) surface immediately. +_RETRYABLE_BEDROCK_CODES = frozenset({ + "ThrottlingException", + "TooManyRequestsException", + "ServiceUnavailableException", + "InternalServerException", + "ModelTimeoutException", + "ModelStreamErrorException", +}) + class BedrockTitanEmbeddingSkill(IndexerSkill): DEFAULT_MODEL_ID = "amazon.titan-embed-text-v2:0" @@ -27,6 +40,14 @@ def __init__(self, config: dict, global_config: Config): region_name=self._config.get("region"), ) + def _is_retryable(self, exc: Exception) -> bool: + if isinstance(exc, ClientError): + code = exc.response.get("Error", {}).get("Code", "") + status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 0) + return code in _RETRYABLE_BEDROCK_CODES or 500 <= status < 600 + # BotoCoreError covers connection/read timeouts and other transport-level issues + return isinstance(exc, BotoCoreError) + def _embed_text(self, content: str, chunk_id=None): self.logger.debug( f"Requesting Bedrock embedding for chunk_id={chunk_id}, content_length={len(content)}" @@ -51,8 +72,8 @@ def _embed_text(self, content: str, chunk_id=None): f"Successfully received embedding for chunk_id={chunk_id}, embedding_dim={len(embedding) if embedding else 0}" ) return embedding - except Exception as exc: - if attempt == self._max_retries - 1: + except (ClientError, BotoCoreError) as exc: + if attempt == self._max_retries - 1 or not self._is_retryable(exc): raise wait = self._retry_backoff * (attempt + 1) self.logger.warning(