Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 83 additions & 72 deletions Cachyos/Scripts/WIP/gh/git-fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,86 @@
raise urllib.error.URLError(f"{e} (url: {url})") from e


def process_single_download(

Check warning on line 95 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L95

process_single_download is too complex (16) (MC0001)
conn: http.client.HTTPSConnection,
host: str,
url_path: str,
local_path: Path,
display_path: str,
headers: dict[str, str],
) -> http.client.HTTPSConnection:
"""Process a single file download with retries."""
Comment on lines +95 to +103
retries = 3
while retries > 0:
try:
conn.request("GET", url_path, headers=headers)
resp = conn.getresponse()

# Check if the server wants to close the connection

Check notice on line 110 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L110

indentation is not a multiple of 4 (comment) (E114)
connection_header = resp.getheader("Connection", "").lower()
should_close = connection_header == "close"

if resp.status == 200:

Check notice on line 114 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L114

Unnecessary "elif" after "break", remove the leading "el" from "elif"
# Create parent directory to avoid FileNotFoundError
local_path.parent.mkdir(parents=True, exist_ok=True)
with open(local_path, "wb") as f:
while True:
chunk = resp.read(65536)
if not chunk:
break
f.write(chunk)
print(f"✓ {display_path}")

if should_close:
conn.close()
conn = http.client.HTTPSConnection(host, timeout=30)

break
elif resp.status in (301, 302, 307, 308):
loc = resp.getheader("Location")
resp.read() # Consume body
if loc:
print(f"✗ {display_path}: Redirect to {loc} not handled in persistent mode")
else:
print(f"✗ {display_path}: HTTP {resp.status}")

if should_close:
conn.close()
conn = http.client.HTTPSConnection(host, timeout=30)
break
else:
print(f"✗ {display_path}: HTTP {resp.status}")
resp.read() # Consume body
if should_close:
conn.close()
conn = http.client.HTTPSConnection(host, timeout=30)

# Non-retriable client errors: fail fast
if resp.status in (401, 403, 404):
break

# Retry on transient server errors and rate limiting
if 500 <= resp.status < 600 or resp.status == 429:
retries -= 1
if retries > 0:
continue
# Out of retries, give up

Check notice on line 158 in Cachyos/Scripts/WIP/gh/git-fetch.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Cachyos/Scripts/WIP/gh/git-fetch.py#L158

indentation is not a multiple of 4 (comment) (E114)
break

# Default: treat other statuses as non-retriable
break
except (http.client.HTTPException, OSError) as e:
# Connection might have been closed by server unexpectedly
conn.close()
retries -= 1
if retries > 0:
conn = http.client.HTTPSConnection(host, timeout=30)
else:
print(f"✗ {display_path}: {e}")

return conn
Comment on lines +95 to +172
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

While this refactoring is a good step in reducing complexity in download_worker, the extracted process_single_download function itself has some maintainability issues that could be improved:

  1. Magic Number: The timeout value 30 is hardcoded in multiple places (here and elsewhere in the file). This should be defined as a module-level constant, e.g., HTTP_TIMEOUT = 30, for better maintainability.

  2. Duplicated Logic (DRY violation): The logic for closing and reopening a connection when should_close is true is repeated three times within this function.

    if should_close:
      conn.close()
      conn = http.client.HTTPSConnection(host, timeout=30)

    This could be extracted into a small, private helper function (e.g., _reconnect(conn, host)) to reduce code duplication and make the intent clearer.



def download_worker(host: str, file_q: queue.Queue, headers: dict[str, str]) -> None:
"""Worker thread: process files from queue using a persistent connection."""
# Make a per-thread copy so we don't mutate a shared headers dict.
Expand All @@ -111,78 +191,9 @@

try:
url_path, local_path, display_path = item

# Process download
retries = 3
while retries > 0:
try:
conn.request("GET", url_path, headers=headers)
resp = conn.getresponse()

# Check if the server wants to close the connection
connection_header = resp.getheader("Connection", "").lower()
should_close = connection_header == "close"

if resp.status == 200:
# Create parent directory to avoid FileNotFoundError
local_path.parent.mkdir(parents=True, exist_ok=True)
with open(local_path, "wb") as f:
while True:
chunk = resp.read(65536)
if not chunk:
break
f.write(chunk)
print(f"✓ {display_path}")

if should_close:
conn.close()
# Reconnect for the next file in the queue
conn = http.client.HTTPSConnection(host, timeout=30)

break
elif resp.status in (301, 302, 307, 308):
loc = resp.getheader("Location")
resp.read() # Consume body
if loc:
print(
f"✗ {display_path}: Redirect to {loc} not handled in persistent mode"
)
else:
print(f"✗ {display_path}: HTTP {resp.status}")

if should_close:
conn.close()
conn = http.client.HTTPSConnection(host, timeout=30)
break
else:
print(f"✗ {display_path}: HTTP {resp.status}")
resp.read() # Consume body
if should_close:
conn.close()
conn = http.client.HTTPSConnection(host, timeout=30)

# Non-retriable client errors: fail fast
if resp.status in (401, 403, 404):
break

# Retry on transient server errors and rate limiting
if 500 <= resp.status < 600 or resp.status == 429:
retries -= 1
if retries > 0:
continue
# Out of retries, give up
break

# Default: treat other statuses as non-retriable
break
except (http.client.HTTPException, OSError) as e:
# Connection might have been closed by server unexpectedly
conn.close()
retries -= 1
if retries > 0:
conn = http.client.HTTPSConnection(host, timeout=30)
else:
print(f"✗ {display_path}: {e}")
conn = process_single_download(
conn, host, url_path, local_path, display_path, headers
)
finally:
file_q.task_done()
finally:
Expand Down
Loading