MultimediaTechLab · Abdul-Mukit · Aug 15, 2024 · Aug 15, 2024 · Aug 16, 2024
diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
@@ -65,13 +65,14 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
             labels_path (str): Path to the directory containing label files.
 
         Returns:
-            list: A list of tuples, each containing the path to an image file and its associated segmentation as a tensor.
+            list: A list of tuples, each containing the path to an image file
+                and its associated segmentation as a tensor.
         """
         images_path = dataset_path / "images" / phase_name
         labels_path, data_type = locate_label_paths(dataset_path, phase_name)
         images_list = sorted([p.name for p in Path(images_path).iterdir() if p.is_file()])
         if data_type == "json":
-            annotations_index, image_info_dict = create_image_metadata(labels_path)
+            annotations_dict, image_info_dict = create_image_metadata(labels_path)
 
         data = []
         valid_inputs = 0
@@ -84,7 +85,7 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
                 image_info = image_info_dict.get(image_id, None)
                 if image_info is None:
                     continue
-                annotations = annotations_index.get(image_info["id"], [])
+                annotations = annotations_dict.get(image_id, [])
                 image_seg_annotations = scale_segmentation(annotations, image_info)
                 if not image_seg_annotations:
                     continue
@@ -99,9 +100,8 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
                 image_seg_annotations = []
 
             labels = self.load_valid_labels(image_id, image_seg_annotations)
-
-            img_path = images_path / image_name
-            data.append((img_path, labels))
+            image_path = images_path / image_name
+            data.append((image_path, labels))
             valid_inputs += 1
         logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
         return data

diff --git a/yolo/utils/dataset_utils.py b/yolo/utils/dataset_utils.py
@@ -39,45 +39,63 @@ def locate_label_paths(dataset_path: Path, phase_name: Path) -> Tuple[Path, Path
 
 def create_image_metadata(labels_path: str) -> Tuple[Dict[str, List], Dict[str, Dict]]:
     """
-    Create a dictionary containing image information and annotations indexed by image ID.
+    Create a dictionary containing image information and annotations
+    both indexed by image id. Image id is the file name without the extension.
+    It is not the same as the int image id saved in coco .json files.
 
     Args:
         labels_path (str): The path to the annotation json file.
 
     Returns:
-        - annotations_index: A dictionary where keys are image IDs and values are lists of annotations.
-        - image_info_dict: A dictionary where keys are image file names without extension and values are image information dictionaries.
+        A Tuple of annotations_dict and image_info_dict.
+        annotations_dict is a dictionary where keys are image ids and values
+        are lists of annotations.
+        image_info_dict is a dictionary where keys are image file id and
+        values are image information dictionaries.
     """
     with open(labels_path, "r") as file:
-        labels_data = json.load(file)
-        id_to_idx = discretize_categories(labels_data.get("categories", [])) if "categories" in labels_data else None
-        annotations_index = organize_annotations_by_image(labels_data, id_to_idx)  # check lookup is a good name?
-        image_info_dict = {Path(img["file_name"]).stem: img for img in labels_data["images"]}
-        return annotations_index, image_info_dict
-
-
-def organize_annotations_by_image(data: Dict[str, Any], id_to_idx: Optional[Dict[int, int]]):
+        json_data = json.load(file)
+        image_id_to_file_name_dict = {
+            img['id'] : Path(img["file_name"]).stem for img in json_data["images"]
+        }
+        # TODO: id_to_idx is unnecessary. `idx = id - 1`` in coco as category_id starts from 1.
+        # what if we had 1M images? Unnecessary!
+        id_to_idx = discretize_categories(json_data.get("categories", [])) if "categories" in json_data else None
+        annotations_dict = map_annotations_to_image_names(json_data, id_to_idx, image_id_to_file_name_dict)  # check lookup is a good name?
+        image_info_dict = {Path(img["file_name"]).stem: img for img in json_data["images"]}
+        return annotations_dict, image_info_dict
+
+
+def map_annotations_to_image_names(
+        json_data: Dict[str, Any],
+        category_id_to_idx: Optional[Dict[int, int]],
+        image_id_to_image_name:dict[int, str]
+) -> dict[str, list[dict]]:
     """
-    Use image index to lookup every annotations
+    Returns a dict mapping image file names to a list of all corresponding annotations.
     Args:
-        data (Dict[str, Any]): A dictionary containing annotation data.
+        json_data: Data read from a COCO json file.
+        category_id_to_idx: For COCO dataset, a dict mapping from category_id
+            to (category_id - 1).  # TODO: depricate?
+        image_id_to_image_name: Dict mapping image_id to image_file name. 
 
     Returns:
-        Dict[int, List[Dict[str, Any]]]: A dictionary where keys are image IDs and values are lists of annotations.
-        Annotations with "iscrowd" set to True are excluded from the index.
-
+        image_name_to_annotation_dict_list: A dictionary where keys are image IDs
+            and values are lists of annotation dictionaries.
+            Annotations with "iscrowd" set to True, are excluded.
     """
-    annotation_lookup = {}
-    for anno in data["annotations"]:
-        if anno["iscrowd"]:
+    image_name_to_annotation_dict_list = {}
+    for annotation_dict in json_data["annotations"]:
+        if annotation_dict["iscrowd"]:
             continue
-        image_id = anno["image_id"]
-        if id_to_idx:
-            anno["category_id"] = id_to_idx[anno["category_id"]]
-        if image_id not in annotation_lookup:
-            annotation_lookup[image_id] = []
-        annotation_lookup[image_id].append(anno)
-    return annotation_lookup
+        image_id = annotation_dict["image_id"]
+        image_name = image_id_to_image_name[image_id]
+        if category_id_to_idx:
+            annotation_dict["category_id"] = category_id_to_idx[annotation_dict["category_id"]]
+        if image_name not in image_name_to_annotation_dict_list:
+            image_name_to_annotation_dict_list[image_name] = []
+        image_name_to_annotation_dict_list[image_name].append(annotation_dict)
+    return image_name_to_annotation_dict_list
 
 
 def scale_segmentation(