Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 42 additions & 24 deletions snakemake_storage_plugin_s3/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import enum
from dataclasses import dataclass, field
from typing import Iterable, List, Optional
from urllib.parse import urlparse
Expand Down Expand Up @@ -207,6 +208,12 @@ def is_valid_query(cls, query: str) -> StorageQueryValidationResult:
)


class _FileType(enum.Enum):
UNKNOWN = enum.auto()
FILE = enum.auto()
DIRECTORY = enum.auto()


# Required:
# Implementation of storage object. If read-only storage (e.g. see
# snakemake-storage-http for comparison), inherit from StorageObjectRead instead.
Expand All @@ -226,7 +233,7 @@ def __post_init__(self):
# (which is invalid for S3 keys).
self.key = posixpath.normpath(parsed.path.lstrip("/"))
self._local_suffix = self._local_suffix_from_key(self.key)
self._is_dir = None
self._file_type = _FileType.UNKNOWN

def s3obj(self, subkey: Optional[str] = ""):
if subkey:
Expand Down Expand Up @@ -278,25 +285,7 @@ def cleanup(self):
# Fallible methods should implement some retry logic.
# Here we simply rely on botos retry logic.
def exists(self) -> bool:
# return True if the object exists
try:
self.s3obj().load()
except botocore.exceptions.ClientError as e:
err_code = e.response["Error"]["Code"]
if err_code == "400":
raise WorkflowError(
f"Bad request for S3 object {self.query}. This can happen if "
"the query contains unsupported characters (e.g. '..'), "
"if your credentials are invalid, or if your permissions are "
"insufficient."
) from e
elif err_code == "404":
if self.bucket_exists() and self.is_dir():
return True
return False
else:
raise e
return True
return self.is_file() or self.is_dir()

def mtime(self) -> float:
# return the modification time
Expand Down Expand Up @@ -325,10 +314,38 @@ def retrieve_object(self):
else:
self.s3obj().download_file(self.local_path())

def is_file(self) -> bool:
if self._file_type != _FileType.UNKNOWN:
return self._file_type == _FileType.FILE

try:
self.s3obj().load()
except botocore.exceptions.ClientError as e:
err_code = e.response["Error"]["Code"]
if err_code == "400":
raise WorkflowError(
f"Bad request for S3 object {self.query}. This can happen if "
"the query contains unsupported characters (e.g. '..'), "
"if your credentials are invalid, or if your permissions are "
"insufficient."
) from e
elif err_code == "404":
return False
else:
raise e
Comment on lines +321 to +335
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Verify boto3 error codes for s3obj().load().

The error code checks compare against "400" and "404" strings. However, boto3's ClientError.response["Error"]["Code"] typically returns semantic error codes (e.g., "NoSuchKey", "BadRequest") rather than HTTP status codes. Please verify that s3obj().load() actually returns these numeric string codes, or update to check the appropriate error codes.


🏁 Script executed:

#!/bin/bash
# Description: Search for boto3 error code handling patterns in the codebase
# to verify the expected error code format

rg -nP --type=py -C3 'response\["Error"\]\["Code"\]'

Length of output: 610


🌐 Web query:

What error codes does boto3 S3 Object.load() return for 404 not found and 400 bad request?

💡 Result:

  • 404 Not Found: boto3 raises a botocore.exceptions.ClientError whose e.response['Error']['Code'] is usually "404" (and S3 may also return "NoSuchKey"). [1][2]

  • 400 Bad Request: boto3 raises ClientError with e.response['Error']['Code'] equal to "400" (or a more specific S3 error code such as "BadRequest", "InvalidArgument", "AuthorizationHeaderMalformed", etc., depending on the exact problem). [2][3]

Sources:
[1] Stack Overflow: examples showing e.response['Error']['Code'] == '404' or 'NoSuchKey'.
[2] AWS S3 HeadObject / Error responses (lists 404/400 behaviors and NoSuchKey / various 4xx error codes).
[3] Boto3 head_object docs (HEAD can return generic 400/403/404 codes).


Handle "NoSuchKey" error code for missing S3 objects.

While boto3 does return "404" and "400" numeric strings as shown in the code, S3 also returns semantic error codes. Most critically, missing objects typically return "NoSuchKey" instead of "404". The current code only checks for numeric codes, so it won't catch the "NoSuchKey" case and will incorrectly re-raise the exception when the object doesn't exist.

Add a check for "NoSuchKey" alongside the "404" check:

elif err_code in ("404", "NoSuchKey"):
    return False

Additionally, line 335 uses raise e which should be just raise to preserve the original traceback.

🧰 Tools
🪛 Ruff (0.14.4)

326-331: Avoid specifying long messages outside the exception class

(TRY003)


335-335: Use raise without specifying exception name

Remove exception name

(TRY201)

🤖 Prompt for AI Agents
In snakemake_storage_plugin_s3/__init__.py around lines 321 to 335, the
exception handling for missing S3 objects only checks numeric error codes and
re-raises exceptions with "raise e", which loses the original traceback; update
the logic to treat both "404" and the semantic "NoSuchKey" as non-existent
objects (i.e., return False when err_code in ("404", "NoSuchKey")), and change
the final re-raise to use a bare "raise" so the original traceback is preserved.


self._file_type = _FileType.FILE
return True

def is_dir(self):
if self._is_dir is None:
self._is_dir = any(self.get_subkeys())
return self._is_dir
if self._file_type != _FileType.UNKNOWN:
return self._file_type == _FileType.DIRECTORY

if not (self.bucket_exists() and any(self.get_subkeys())):
return False

self._file_type = _FileType.DIRECTORY
return True

def get_subkeys(self):
prefix = self.s3obj().key + "/"
Expand All @@ -355,13 +372,14 @@ def store_object(self):
self.provider.s3c.create_bucket(**create_bucket_params)

if self.local_path().is_dir():
self._is_dir = True
self._file_type = _FileType.DIRECTORY
for item in self.local_path().rglob("*"):
if item.is_file():
self.s3obj(subkey=item.relative_to(self.local_path())).upload_file(
item
)
else:
self._file_type = _FileType.FILE
self.s3obj().upload_file(self.local_path())

def remove(self):
Expand Down