bdraco · aiolibsbot · May 17, 2026
diff --git a/src/index_503/index.py b/src/index_503/index.py
@@ -2,6 +2,7 @@
 import logging
 import os
 from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor
 from operator import attrgetter
 from pathlib import Path
 from shutil import rmtree
@@ -90,11 +91,10 @@ def _atomic_replace_old_index(self, temp_dir_path: Path, target_path: Path) -> N
 
     def _make_index_at_temp_dir(self, temp_dir_path: Path) -> None:
         """Generate a simple repository of Python wheels in a temp dir."""
-        new_wheel_file_objects: list[WheelFile] = []
         projects: dict[str, list[WheelFile]] = defaultdict(list)
-        wheel_file_name_to_metadata_path: dict[str, Path] = {}
         all_wheel_files: set[str] = set()
         raw_cache = self.cache.cache
+        misses: list[tuple[Path, Path, Path]] = []
 
         for wheel_file in glob.glob(str(self.origin_path.joinpath("*.whl"))):
             wheel_path = Path(wheel_file)
@@ -108,16 +108,25 @@ def _make_index_at_temp_dir(self, temp_dir_path: Path) -> None:
                 wheel_file_obj := WheelFile.from_cache(wheel_cache, mtime, size)
             ):
                 os.link(self.target_path.joinpath(metadata_path.name), metadata_path)
-            elif wheel_file_obj := WheelFile.from_wheel(wheel_path, metadata_path):
-                wheel_file_name_to_metadata_path[wheel_file_name] = metadata_path
-                new_wheel_file_objects.append(wheel_file_obj)
-                raw_cache[wheel_file_name] = wheel_file_obj.as_dict()
+                projects[wheel_file_obj.canonical_name].append(wheel_file_obj)
+                os.link(wheel_path, target_file)
             else:
-                continue
-
-            canonical_name = wheel_file_obj.canonical_name
-            projects[canonical_name].append(wheel_file_obj)
-            os.link(wheel_path, target_file)
+                misses.append((wheel_path, target_file, metadata_path))
+
+        if misses:
+            # from_wheel is I/O- and hash-bound (both release the GIL); each
+            # task writes only to its own metadata_path, so threads are safe.
+            with ThreadPoolExecutor() as executor:
+                results = executor.map(
+                    lambda args: (args, WheelFile.from_wheel(args[0], args[2])),
+                    misses,
+                )
+                for (wheel_path, target_file, _), wheel_file_obj in results:
+                    if wheel_file_obj is None:
+                        continue
+                    raw_cache[wheel_path.name] = wheel_file_obj.as_dict()
+                    projects[wheel_file_obj.canonical_name].append(wheel_file_obj)
+                    os.link(wheel_path, target_file)
 
         self.cache.remove_stale_keys(all_wheel_files)
         self.generate_index_pages(temp_dir_path, projects)

diff --git a/src/index_503/util.py b/src/index_503/util.py
@@ -26,11 +26,20 @@ def get_mtime_and_size_from_path(path: Path) -> tuple[float, int]:
     return stat.st_mtime, stat.st_size
 
 
+_HASH_CHUNK_SIZE = 1024 * 1024
+
+
 def get_sha256_hash(filename: Path) -> str:
-    """Get SHA256 hash of a file."""
+    """Get SHA256 hash of a file.
+
+    Streams the file in chunks to avoid loading large wheels entirely
+    into memory.
+    """
+    hasher = sha256()
     with filename.open("rb") as f:
-        bytes = f.read()  # read entire file as bytes
-        return sha256(bytes).hexdigest()
+        for chunk in iter(lambda: f.read(_HASH_CHUNK_SIZE), b""):
+            hasher.update(chunk)
+    return hasher.hexdigest()
 
 
 def load_json_file(filename: Path) -> dict[str, dict[str, Any]]: