From 8f2ecc2af730bb1c358940c396e376375f33042d Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Wed, 14 Aug 2024 03:44:18 +0000
Subject: [PATCH 01/13] [fix] automatically regenerate the cache when it
 doesn't match

---
 yolo/tools/data_loader.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index 9ceb455f..bf31c37a 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -54,8 +54,33 @@ def load_data(self, dataset_path: Path, phase_name: str):
         else:
             data = torch.load(cache_path)
             logger.info("📦 Loaded {} cache", phase_name)
+            
+            # Validate cache
+            if self.validate_cache(dataset_path, phase_name, data):
+                logger.info("✅ Cache validation successful")
+            else:
+                logger.warning("⚠️ Cache validation failed, regenerating")
+                data = self.filter_data(dataset_path, phase_name)
+                torch.save(data, cache_path)
+        
         return data
 
+    def validate_cache(self, dataset_path: Path, phase_name: str, cached_data: list) -> bool:
+        """
+        Validates if the cached data is consistent with the current dataset, comparing complete file paths
+        """
+        images_path = dataset_path / "images" / phase_name
+        current_images = sorted([p.resolve() for p in images_path.iterdir() if p.is_file()])
+        cached_images = sorted([Path(item[0]).resolve() for item in cached_data])
+
+        # Check if image file paths are completely consistent
+        if current_images != cached_images:
+            return False
+
+        # Can add more validation steps, e.g. checking label file modification times
+
+        return True
+
     def filter_data(self, dataset_path: Path, phase_name: str) -> list:
         """
         Filters and collects dataset information by pairing images with their corresponding labels.

From d2c36b18c47c7cd2b60c3e386bf31666ebb571d5 Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Wed, 14 Aug 2024 05:13:11 +0000
Subject: [PATCH 02/13] [fix] short the path of cache file

---
 yolo/tools/data_loader.py | 25 +++++--------------------
 1 file changed, 5 insertions(+), 20 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index bf31c37a..3d470f2a 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -32,7 +32,8 @@ def __init__(self, data_cfg: DataConfig, dataset_cfg: DatasetConfig, phase: str
         transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
         self.transform = AugmentationComposer(transforms, self.image_size)
         self.transform.get_more_data = self.get_more_data
-        self.data = self.load_data(Path(dataset_cfg.path), phase_name)
+        self.data_root = Path(dataset_cfg.path)
+        self.data = self.load_data(self.data_root, phase_name)
 
     def load_data(self, dataset_path: Path, phase_name: str):
         """
@@ -54,9 +55,8 @@ def load_data(self, dataset_path: Path, phase_name: str):
         else:
             data = torch.load(cache_path)
             logger.info("📦 Loaded {} cache", phase_name)
-            
             # Validate cache
-            if self.validate_cache(dataset_path, phase_name, data):
+            if data[0][0].parent == Path("images")/phase_name:
                 logger.info("✅ Cache validation successful")
             else:
                 logger.warning("⚠️ Cache validation failed, regenerating")
@@ -65,22 +65,6 @@ def load_data(self, dataset_path: Path, phase_name: str):
         
         return data
 
-    def validate_cache(self, dataset_path: Path, phase_name: str, cached_data: list) -> bool:
-        """
-        Validates if the cached data is consistent with the current dataset, comparing complete file paths
-        """
-        images_path = dataset_path / "images" / phase_name
-        current_images = sorted([p.resolve() for p in images_path.iterdir() if p.is_file()])
-        cached_images = sorted([Path(item[0]).resolve() for item in cached_data])
-
-        # Check if image file paths are completely consistent
-        if current_images != cached_images:
-            return False
-
-        # Can add more validation steps, e.g. checking label file modification times
-
-        return True
-
     def filter_data(self, dataset_path: Path, phase_name: str) -> list:
         """
         Filters and collects dataset information by pairing images with their corresponding labels.
@@ -125,7 +109,7 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
 
             labels = self.load_valid_labels(image_id, image_seg_annotations)
 
-            img_path = images_path / image_name
+            img_path = Path("images") / phase_name / image_name
             data.append((img_path, labels))
             valid_inputs += 1
         logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
@@ -158,6 +142,7 @@ def load_valid_labels(self, label_path: str, seg_data_one_img: list) -> Union[Te
 
     def get_data(self, idx):
         img_path, bboxes = self.data[idx]
+        img_path = self.data_root / Path(img_path)
         img = Image.open(img_path).convert("RGB")
         return img, bboxes, img_path
 

From 86da903718367f5fe198cf9ab322c369ed564997 Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Wed, 14 Aug 2024 05:13:43 +0000
Subject: [PATCH 03/13] add workflow

---
 .github/workflows/deploy.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
index 3e265d82..25693192 100644
--- a/.github/workflows/deploy.yaml
+++ b/.github/workflows/deploy.yaml
@@ -2,7 +2,7 @@ name: Deploy Mode Validation & Inference
 
 on:
   push:
-    branches: [main]
+    branches: [main,TRAIN]
   pull_request:
     branches: [main]
 

From 9ffcc475a7691aee45a7292e09e8380909ce0af3 Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Thu, 15 Aug 2024 09:59:47 +0000
Subject: [PATCH 04/13] 1. ModelValidator align ModelTrainer, ModelValidator 2.
 add log of optimizer 3. get image_id map during validation 4. update the
 structure of folder, support the image_root and label_root in different phase
 5. get_metadata will get the image_file not the image_id 6. add the mode
 include "detection", "segmentation", default "detection" 7. fix the
 good_epoch

---
 yolo/config/dataset/mock.yaml |  6 +++--
 yolo/lazy.py                  |  2 +-
 yolo/tools/data_loader.py     | 45 +++++++++++++++++------------------
 yolo/tools/solver.py          | 32 +++++++++++++++----------
 yolo/utils/dataset_utils.py   | 27 +++++++++++----------
 yolo/utils/model_utils.py     |  5 ++--
 6 files changed, 64 insertions(+), 53 deletions(-)

diff --git a/yolo/config/dataset/mock.yaml b/yolo/config/dataset/mock.yaml
index c7d58a10..fc73643c 100644
--- a/yolo/config/dataset/mock.yaml
+++ b/yolo/config/dataset/mock.yaml
@@ -1,6 +1,8 @@
 path: tests/data
-train: train
-validation: val
+image_train: images/train
+label_train: annotations/instances_train.json
+image_validation: images/val
+label_validation: annotations/instances_val.json
 
 class_num: 80
 class_list: ['Person', 'Bicycle', 'Car', 'Motorcycle', 'Airplane', 'Bus', 'Train', 'Truck', 'Boat', 'Traffic light', 'Fire hydrant', 'Stop sign', 'Parking meter', 'Bench', 'Bird', 'Cat', 'Dog', 'Horse', 'Sheep', 'Cow', 'Elephant', 'Bear', 'Zebra', 'Giraffe', 'Backpack', 'Umbrella', 'Handbag', 'Tie', 'Suitcase', 'Frisbee', 'Skis', 'Snowboard', 'Sports ball', 'Kite', 'Baseball bat', 'Baseball glove', 'Skateboard', 'Surfboard', 'Tennis racket', 'Bottle', 'Wine glass', 'Cup', 'Fork', 'Knife', 'Spoon', 'Bowl', 'Banana', 'Apple', 'Sandwich', 'Orange', 'Broccoli', 'Carrot', 'Hot dog', 'Pizza', 'Donut', 'Cake', 'Chair', 'Couch', 'Potted plant', 'Bed', 'Dining table', 'Toilet', 'Tv', 'Laptop', 'Mouse', 'Remote', 'Keyboard', 'Cell phone', 'Microwave', 'Oven', 'Toaster', 'Sink', 'Refrigerator', 'Book', 'Clock', 'Vase', 'Scissors', 'Teddy bear', 'Hair drier', 'Toothbrush']
diff --git a/yolo/lazy.py b/yolo/lazy.py
index 1bc5577e..24b45701 100644
--- a/yolo/lazy.py
+++ b/yolo/lazy.py
@@ -32,7 +32,7 @@ def main(cfg: Config):
     if cfg.task.task == "train":
         solver = ModelTrainer(cfg, model, converter, progress, device, use_ddp)
     if cfg.task.task == "validation":
-        solver = ModelValidator(cfg.task, cfg.dataset, model, converter, progress, device)
+        solver = ModelValidator(cfg, model, converter, progress, device)
     if cfg.task.task == "inference":
         solver = ModelTester(cfg, model, converter, progress, device)
     progress.start()
diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index 3d470f2a..05125106 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -24,7 +24,7 @@
 
 
 class YoloDataset(Dataset):
-    def __init__(self, data_cfg: DataConfig, dataset_cfg: DatasetConfig, phase: str = "train2017"):
+    def __init__(self, data_cfg: DataConfig, dataset_cfg: DatasetConfig, phase: str = "train"):
         augment_cfg = data_cfg.data_augment
         self.image_size = data_cfg.image_size
         phase_name = dataset_cfg.get(phase, phase)
@@ -32,10 +32,11 @@ def __init__(self, data_cfg: DataConfig, dataset_cfg: DatasetConfig, phase: str
         transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
         self.transform = AugmentationComposer(transforms, self.image_size)
         self.transform.get_more_data = self.get_more_data
-        self.data_root = Path(dataset_cfg.path)
-        self.data = self.load_data(self.data_root, phase_name)
+        self.images_path = Path(dataset_cfg.path) / getattr(dataset_cfg, "image_" + phase)
+        self.labels_path = Path(dataset_cfg.path) / getattr(dataset_cfg, "label_" + phase)
+        self.data = self.load_data(self.images_path, self.labels_path, phase_name)
 
-    def load_data(self, dataset_path: Path, phase_name: str):
+    def load_data(self, images_path: Path, labels_path: Path, phase_name: str):
         """
         Loads data from a cache or generates a new cache for a specific dataset phase.
 
@@ -46,26 +47,26 @@ def load_data(self, dataset_path: Path, phase_name: str):
         Returns:
             dict: The loaded data from the cache for the specified phase.
         """
-        cache_path = dataset_path / f"{phase_name}.cache"
+        cache_path = images_path / f"{phase_name}.cache"
 
         if not cache_path.exists():
             logger.info("🏭 Generating {} cache", phase_name)
-            data = self.filter_data(dataset_path, phase_name)
+            data = self.filter_data(images_path, labels_path, phase_name)
             torch.save(data, cache_path)
         else:
             data = torch.load(cache_path)
             logger.info("📦 Loaded {} cache", phase_name)
-            # Validate cache
-            if data[0][0].parent == Path("images")/phase_name:
-                logger.info("✅ Cache validation successful")
-            else:
-                logger.warning("⚠️ Cache validation failed, regenerating")
-                data = self.filter_data(dataset_path, phase_name)
-                torch.save(data, cache_path)
+            # TODO: add Validate cache
+            # if data[0][0].parent == Path("images")/phase_name:
+            #     logger.info("✅ Cache validation successful")
+            # else:
+            #     logger.warning("⚠️ Cache validation failed, regenerating")
+            #     data = self.filter_data(images_path, labels_path, phase_name)
+            #     torch.save(data, cache_path)
         
         return data
 
-    def filter_data(self, dataset_path: Path, phase_name: str) -> list:
+    def filter_data(self, images_path: Path, labels_path: Path, phase_name: str) -> list:
         """
         Filters and collects dataset information by pairing images with their corresponding labels.
 
@@ -76,8 +77,7 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
         Returns:
             list: A list of tuples, each containing the path to an image file and its associated segmentation as a tensor.
         """
-        images_path = dataset_path / "images" / phase_name
-        labels_path, data_type = locate_label_paths(dataset_path, phase_name)
+        labels_path, data_type = locate_label_paths(labels_path, phase_name)
         images_list = sorted([p.name for p in Path(images_path).iterdir() if p.is_file()])
         if data_type == "json":
             annotations_index, image_info_dict = create_image_metadata(labels_path)
@@ -90,11 +90,11 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
             image_id = Path(image_name).stem
 
             if data_type == "json":
-                image_info = image_info_dict.get(image_id, None)
+                image_info = image_info_dict[image_name] #.get(image_id, None)
                 if image_info is None:
                     continue
                 annotations = annotations_index.get(image_info["id"], [])
-                image_seg_annotations = scale_segmentation(annotations, image_info)
+                image_seg_annotations = scale_segmentation(annotations, image_info) # coco2yolo 
                 if not image_seg_annotations:
                     continue
 
@@ -106,11 +106,10 @@ def filter_data(self, dataset_path: Path, phase_name: str) -> list:
                     image_seg_annotations = [list(map(float, line.strip().split())) for line in file]
             else:
                 image_seg_annotations = []
+            # TODO: correct the box and log the image file
+            labels = self.load_valid_labels(images_path / image_name, image_seg_annotations)
 
-            labels = self.load_valid_labels(image_id, image_seg_annotations)
-
-            img_path = Path("images") / phase_name / image_name
-            data.append((img_path, labels))
+            data.append((image_name, labels))
             valid_inputs += 1
         logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
         return data
@@ -142,7 +141,7 @@ def load_valid_labels(self, label_path: str, seg_data_one_img: list) -> Union[Te
 
     def get_data(self, idx):
         img_path, bboxes = self.data[idx]
-        img_path = self.data_root / Path(img_path)
+        img_path = self.images_path / img_path
         img = Image.open(img_path).convert("RGB")
         return img, bboxes, img_path
 
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index 69953e39..34753ad7 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -21,7 +21,7 @@
 from yolo.tools.drawer import draw_bboxes, draw_model
 from yolo.tools.loss_functions import create_loss_function
 from yolo.utils.bounding_box_utils import Vec2Box, calculate_map
-from yolo.utils.dataset_utils import locate_label_paths
+from yolo.utils.dataset_utils import locate_label_paths,create_image_metadata
 from yolo.utils.logging_utils import ProgressLogger, log_model_structure
 from yolo.utils.model_utils import (
     ExponentialMovingAverage,
@@ -58,7 +58,7 @@ def __init__(self, cfg: Config, model: YOLO, vec2box: Vec2Box, progress: Progres
         self.validation_dataloader = create_dataloader(
             cfg.task.validation.data, cfg.dataset, cfg.task.validation.task, use_ddp
         )
-        self.validator = ModelValidator(cfg.task.validation, cfg.dataset, model, vec2box, progress, device)
+        self.validator = ModelValidator(cfg, model, vec2box, progress, device)
 
         if getattr(train_cfg.ema, "enabled", False):
             self.ema = ExponentialMovingAverage(model, decay=train_cfg.ema.decay)
@@ -126,11 +126,11 @@ def save_checkpoint(self, epoch_idx: int, file_name: Optional[str] = None):
         torch.save(checkpoint, file_path)
 
     def good_epoch(self, mAPs: Dict[str, Tensor]) -> bool:
-        save_flag = True
+        save_flag = False
         for mAP_key, mAP_val in mAPs.items():
+            if not self.mAPs_dict[mAP_key] or mAP_val > max(self.mAPs_dict[mAP_key]):
+                save_flag = True
             self.mAPs_dict[mAP_key].append(mAP_val)
-            if mAP_val < max(self.mAPs_dict[mAP_key]):
-                save_flag = False
         return save_flag
 
     def solve(self, dataloader: DataLoader):
@@ -212,8 +212,7 @@ def solve(self, dataloader: StreamDataLoader):
 class ModelValidator:
     def __init__(
         self,
-        validation_cfg: ValidationConfig,
-        dataset_cfg: DatasetConfig,
+        cfg: Config,
         model: YOLO,
         vec2box: Vec2Box,
         progress: ProgressLogger,
@@ -222,13 +221,14 @@ def __init__(
         self.model = model
         self.device = device
         self.progress = progress
-
-        self.post_proccess = PostProccess(vec2box, validation_cfg.nms)
+        
+        self.post_proccess = PostProccess(vec2box, cfg.task.validation.nms)
         self.json_path = self.progress.save_path / "predict.json"
 
         with contextlib.redirect_stdout(io.StringIO()):
             # TODO: load with config file
-            json_path, _ = locate_label_paths(Path(dataset_cfg.path), dataset_cfg.get("validation", "val"))
+            self.labels_path = Path(cfg.dataset.path) / getattr(cfg.dataset, "label_" + cfg.task.validation.task)
+            json_path, _ = locate_label_paths(self.labels_path, cfg.dataset.get("validation", "validation"))
             if json_path:
                 self.coco_gt = COCO(json_path)
 
@@ -236,6 +236,7 @@ def solve(self, dataloader, epoch_idx=1):
         # logger.info("🧪 Start Validation!")
         self.model.eval()
         predict_json, mAPs = [], defaultdict(list)
+        _, image_info_dict = create_image_metadata(self.labels_path)
         self.progress.start_one_epoch(len(dataloader), task="Validate")
         for batch_size, images, targets, rev_tensor, img_paths in dataloader:
             images, targets, rev_tensor = images.to(self.device), targets.to(self.device), rev_tensor.to(self.device)
@@ -250,7 +251,7 @@ def solve(self, dataloader, epoch_idx=1):
             avg_mAPs = {key: torch.mean(torch.stack(val)) for key, val in mAPs.items()}
             self.progress.one_batch(avg_mAPs)
 
-            predict_json.extend(predicts_to_json(img_paths, predicts, rev_tensor))
+            predict_json.extend(predicts_to_json(img_paths, image_info_dict, predicts, rev_tensor))
         self.progress.finish_one_epoch(avg_mAPs, epoch_idx=epoch_idx)
         self.progress.visualize_image(images, targets, predicts, epoch_idx=epoch_idx)
 
@@ -259,9 +260,16 @@ def solve(self, dataloader, epoch_idx=1):
             if self.progress.local_rank != 0:
                 return
             json.dump(predict_json, f)
-        if hasattr(self, "coco_gt"):
+
+        # yolo dataset will ignore
+        if predict_json and hasattr(self, "coco_gt"):
             self.progress.start_pycocotools()
             result = calculate_ap(self.coco_gt, predict_json)
             self.progress.finish_pycocotools(result, epoch_idx)
+        else:
+            if not predict_json:
+                logger.warning("⚠️ No predictions available for evaluation.")
+            if not hasattr(self, "coco_gt"):
+                logger.warning("⚠️ COCO ground truth not found. Please check dataset configuration.")
 
         return avg_mAPs
diff --git a/yolo/utils/dataset_utils.py b/yolo/utils/dataset_utils.py
index a6c6e1fd..abd740bd 100644
--- a/yolo/utils/dataset_utils.py
+++ b/yolo/utils/dataset_utils.py
@@ -10,7 +10,7 @@
 from yolo.tools.data_conversion import discretize_categories
 
 
-def locate_label_paths(dataset_path: Path, phase_name: Path) -> Tuple[Path, Path]:
+def locate_label_paths(label_path: Path, phase_name: Path) -> Tuple[Path, Path]:
     """
     Find the path to label files for a specified dataset and phase(e.g. training).
 
@@ -21,17 +21,14 @@ def locate_label_paths(dataset_path: Path, phase_name: Path) -> Tuple[Path, Path
     Returns:
         Tuple[Path, Path]: A tuple containing the path to the labels file and the file format ("json" or "txt").
     """
-    json_labels_path = dataset_path / "annotations" / f"instances_{phase_name}.json"
 
-    txt_labels_path = dataset_path / "labels" / phase_name
+    if label_path.is_file():
+        return label_path, "json"
 
-    if json_labels_path.is_file():
-        return json_labels_path, "json"
-
-    elif txt_labels_path.is_dir():
-        txt_files = [f for f in os.listdir(txt_labels_path) if f.endswith(".txt")]
+    elif label_path.is_dir():
+        txt_files = [f for f in os.listdir(label_path) if f.endswith(".txt")]
         if txt_files:
-            return txt_labels_path, "txt"
+            return label_path, "txt"
 
     logger.warning("No labels found in the specified dataset path and phase name.")
     return [], None
@@ -52,7 +49,7 @@ def create_image_metadata(labels_path: str) -> Tuple[Dict[str, List], Dict[str,
         labels_data = json.load(file)
         id_to_idx = discretize_categories(labels_data.get("categories", [])) if "categories" in labels_data else None
         annotations_index = organize_annotations_by_image(labels_data, id_to_idx)  # check lookup is a good name?
-        image_info_dict = {Path(img["file_name"]).stem: img for img in labels_data["images"]}
+        image_info_dict = {img["file_name"]: img for img in labels_data["images"]}
         return annotations_index, image_info_dict
 
 
@@ -81,7 +78,8 @@ def organize_annotations_by_image(data: Dict[str, Any], id_to_idx: Optional[Dict
 
 
 def scale_segmentation(
-    annotations: List[Dict[str, Any]], image_dimensions: Dict[str, int]
+    annotations: List[Dict[str, Any]], image_dimensions: Dict[str, int],
+    mode: str ="detection"
 ) -> Optional[List[List[float]]]:
     """
     Scale the segmentation data based on image dimensions and return a list of scaled segmentation data.
@@ -100,10 +98,13 @@ def scale_segmentation(
     h, w = image_dimensions["height"], image_dimensions["width"]
     for anno in annotations:
         category_id = anno["category_id"]
-        if "segmentation" in anno:
+        if "segmentation" in anno and mode=="segmentation":
             seg_list = [item for sublist in anno["segmentation"] for item in sublist]
-        elif "bbox" in anno:
+        elif "bbox" in anno and mode=="detection":
             seg_list = anno["bbox"]
+        else:
+            # invalid annotation
+            return []
         scaled_seg_data = (
             np.array(seg_list).reshape(-1, 2) / [w, h]
         ).tolist()  # make the list group in x, y pairs and scaled with image width, height
diff --git a/yolo/utils/model_utils.py b/yolo/utils/model_utils.py
index c35b6009..29722f09 100644
--- a/yolo/utils/model_utils.py
+++ b/yolo/utils/model_utils.py
@@ -73,6 +73,7 @@ def next_batch(self):
     optimizer_class.next_epoch = next_epoch
     optimizer = optimizer_class(model_parameters, **optim_cfg.args)
     optimizer.max_lr = [0.1, 0, 0]
+    logger.info(f"✅ Optimizer {optim_cfg.type} initialized successfully")
     return optimizer
 
 
@@ -160,7 +161,7 @@ def collect_prediction(predict_json: List, local_rank: int) -> List:
     return predict_json
 
 
-def predicts_to_json(img_paths, predicts, rev_tensor):
+def predicts_to_json(img_paths, image_id_map, predicts, rev_tensor):
     """
     TODO: function document
     turn a batch of imagepath and predicts(n x 6 for each image) to a List of diction(Detection output)
@@ -172,7 +173,7 @@ def predicts_to_json(img_paths, predicts, rev_tensor):
         bboxes[:, 1:5] = transform_bbox(bboxes[:, 1:5], "xyxy -> xywh")
         for cls, *pos, conf in bboxes:
             bbox = {
-                "image_id": int(Path(img_path).stem),
+                "image_id": image_id_map[img_path.name]["id"],
                 "category_id": IDX_TO_ID[int(cls)],
                 "bbox": [float(p) for p in pos],
                 "score": float(conf),

From 6ee806b61e840ad7b23befd1cea0fde1c3b39d29 Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Fri, 16 Aug 2024 01:50:34 +0000
Subject: [PATCH 05/13] fix the question of getting image_name

---
 yolo/tools/data_loader.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index 05125106..db26ad23 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -89,8 +89,9 @@ def filter_data(self, images_path: Path, labels_path: Path, phase_name: str) ->
                 continue
             image_id = Path(image_name).stem
 
-            if data_type == "json":
-                image_info = image_info_dict[image_name] #.get(image_id, None)
+            if data_type == "json": 
+                image_info = image_info_dict.get(image_name, None)
+                # TODO: neg case can be load
                 if image_info is None:
                     continue
                 annotations = annotations_index.get(image_info["id"], [])

From 8ab6e1a8b215ed2c1a2b93cb899ea96aa2e62881 Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Fri, 16 Aug 2024 02:11:22 +0000
Subject: [PATCH 06/13] Use the folder as the cache name

---
 yolo/tools/data_loader.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index db26ad23..e9f38973 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -41,13 +41,14 @@ def load_data(self, images_path: Path, labels_path: Path, phase_name: str):
         Loads data from a cache or generates a new cache for a specific dataset phase.
 
         Parameters:
-            dataset_path (Path): The root path to the dataset directory.
+            images_path (Path): The root path to the images directory.
+            labels_path (Path): The root path to the labels directory.
             phase_name (str): The specific phase of the dataset (e.g., 'train', 'test') to load or generate data for.
 
         Returns:
             dict: The loaded data from the cache for the specified phase.
         """
-        cache_path = images_path / f"{phase_name}.cache"
+        cache_path = images_path.with_suffix(".cache")
 
         if not cache_path.exists():
             logger.info("🏭 Generating {} cache", phase_name)

From 0173a1e52de78e55cf33e28969aa0fc913abb0e1 Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Fri, 16 Aug 2024 09:16:53 +0000
Subject: [PATCH 07/13] use relative path to distingush file merge coco object,
 duplicate data is not merged

---
 yolo/tools/data_loader.py  | 31 ++++++++++++++++++---
 yolo/tools/solver.py       | 43 ++++++++++++++++++++++-------
 yolo/utils/model_utils.py  |  2 +-
 yolo/utils/solver_utils.py | 55 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 117 insertions(+), 14 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index e9f38973..53db5d83 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -32,9 +32,32 @@ def __init__(self, data_cfg: DataConfig, dataset_cfg: DatasetConfig, phase: str
         transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
         self.transform = AugmentationComposer(transforms, self.image_size)
         self.transform.get_more_data = self.get_more_data
-        self.images_path = Path(dataset_cfg.path) / getattr(dataset_cfg, "image_" + phase)
-        self.labels_path = Path(dataset_cfg.path) / getattr(dataset_cfg, "label_" + phase)
-        self.data = self.load_data(self.images_path, self.labels_path, phase_name)
+        
+        self.get_dataset_path(cfg=dataset_cfg,phase=phase)
+       
+        self.data = []
+        for images_path, labels_path in zip(self.images_paths, self.labels_paths):
+            datas = self.load_data(images_path, labels_path, phase_name)
+            datas = [ (images_path / data[0], *data[1:]) for data in datas]
+            self.data.extend(datas)
+
+    def get_dataset_path(self,cfg:DataConfig, phase: str = "train"):
+         # dataset source
+        images_paths = getattr(cfg, "image_" + phase)
+        if isinstance(images_paths, str):
+            images_paths = [images_paths]
+        elif isinstance(images_paths, tuple):
+            images_paths = list(images_paths)
+        self.images_paths = [Path(cfg.path) / images_path for images_path in images_paths]
+        
+        labels_paths = getattr(cfg, "label_" + phase)
+        if isinstance(labels_paths, str):
+            labels_paths = [labels_paths]
+        elif isinstance(labels_paths, tuple):
+            labels_paths = list(labels_paths)
+        self.labels_paths = [Path(cfg.path) / labels_path for labels_path in labels_paths]
+        
+        assert len(self.images_paths) == len(self.labels_paths)
 
     def load_data(self, images_path: Path, labels_path: Path, phase_name: str):
         """
@@ -143,7 +166,7 @@ def load_valid_labels(self, label_path: str, seg_data_one_img: list) -> Union[Te
 
     def get_data(self, idx):
         img_path, bboxes = self.data[idx]
-        img_path = self.images_path / img_path
+        # img_path = self.images_path / img_path
         img = Image.open(img_path).convert("RGB")
         return img, bboxes, img_path
 
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index 34753ad7..a581bb51 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -31,7 +31,7 @@
     create_scheduler,
     predicts_to_json,
 )
-from yolo.utils.solver_utils import calculate_ap
+from yolo.utils.solver_utils import calculate_ap, merge_coco_objects
 
 
 class ModelTrainer:
@@ -227,18 +227,43 @@ def __init__(
 
         with contextlib.redirect_stdout(io.StringIO()):
             # TODO: load with config file
-            self.labels_path = Path(cfg.dataset.path) / getattr(cfg.dataset, "label_" + cfg.task.validation.task)
-            json_path, _ = locate_label_paths(self.labels_path, cfg.dataset.get("validation", "validation"))
-            if json_path:
-                self.coco_gt = COCO(json_path)
+            labels_paths = getattr(cfg.dataset, "label_" + cfg.task.validation.task)
+            
+            if isinstance(labels_paths, str):
+                labels_paths = [labels_paths]
+            elif isinstance(labels_paths, tuple):
+                labels_paths = list(labels_paths)
+            self.labels_paths = [Path(cfg.dataset.path) / labels_path for labels_path in labels_paths]
+            
+            images_paths = getattr(cfg.dataset, "image_" + cfg.task.validation.task)
+            if isinstance(images_paths, str):
+                images_paths = [images_paths]
+            elif isinstance(images_paths, tuple):
+                images_paths = list(images_paths)
+            self.images_paths = [Path(cfg.dataset.path) / images_path for images_path in images_paths]
+            
+            
+            json_paths = [locate_label_paths(labels_path, cfg.dataset.get("validation", "validation")) \
+                            for labels_path in self.labels_paths]
+
+            # merge coco object, duplicate data is not merged
+            coco_objects = [COCO(json_path[0]) for json_path in json_paths if json_path[0]]
+            self.coco_gt = merge_coco_objects(coco_objects)
 
     def solve(self, dataloader, epoch_idx=1):
         # logger.info("🧪 Start Validation!")
         self.model.eval()
         predict_json, mAPs = [], defaultdict(list)
-        _, image_info_dict = create_image_metadata(self.labels_path)
+        # only save the unique path
+        image_info_dicts = {}
+        for images_path, labels_path in zip(self.images_paths, self.labels_paths):
+            _, image_info_dict = create_image_metadata(labels_path)
+            modified_dict = {f"{images_path/key}": value for key, value in image_info_dict.items()}
+            image_info_dicts.update(modified_dict)
+          
         self.progress.start_one_epoch(len(dataloader), task="Validate")
         for batch_size, images, targets, rev_tensor, img_paths in dataloader:
+            images_path = dataloader.dataset.images_paths
             images, targets, rev_tensor = images.to(self.device), targets.to(self.device), rev_tensor.to(self.device)
             with torch.no_grad():
                 predicts = self.model(images)
@@ -251,7 +276,7 @@ def solve(self, dataloader, epoch_idx=1):
             avg_mAPs = {key: torch.mean(torch.stack(val)) for key, val in mAPs.items()}
             self.progress.one_batch(avg_mAPs)
 
-            predict_json.extend(predicts_to_json(img_paths, image_info_dict, predicts, rev_tensor))
+            predict_json.extend(predicts_to_json(img_paths, image_info_dicts, predicts, rev_tensor))
         self.progress.finish_one_epoch(avg_mAPs, epoch_idx=epoch_idx)
         self.progress.visualize_image(images, targets, predicts, epoch_idx=epoch_idx)
 
@@ -261,8 +286,8 @@ def solve(self, dataloader, epoch_idx=1):
                 return
             json.dump(predict_json, f)
 
-        # yolo dataset will ignore
-        if predict_json and hasattr(self, "coco_gt"):
+        # yolo dataset or no result will ignore
+        if predict_json and len(self.coco_gt)>0:
             self.progress.start_pycocotools()
             result = calculate_ap(self.coco_gt, predict_json)
             self.progress.finish_pycocotools(result, epoch_idx)
diff --git a/yolo/utils/model_utils.py b/yolo/utils/model_utils.py
index 29722f09..990de5e8 100644
--- a/yolo/utils/model_utils.py
+++ b/yolo/utils/model_utils.py
@@ -173,7 +173,7 @@ def predicts_to_json(img_paths, image_id_map, predicts, rev_tensor):
         bboxes[:, 1:5] = transform_bbox(bboxes[:, 1:5], "xyxy -> xywh")
         for cls, *pos, conf in bboxes:
             bbox = {
-                "image_id": image_id_map[img_path.name]["id"],
+                "image_id": image_id_map[str(img_path)]["id"],
                 "category_id": IDX_TO_ID[int(cls)],
                 "bbox": [float(p) for p in pos],
                 "score": float(conf),
diff --git a/yolo/utils/solver_utils.py b/yolo/utils/solver_utils.py
index b04efcfd..c94d8337 100644
--- a/yolo/utils/solver_utils.py
+++ b/yolo/utils/solver_utils.py
@@ -1,6 +1,7 @@
 import contextlib
 import io
 
+import copy
 import numpy as np
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
@@ -46,3 +47,57 @@ def make_ap_table(score, past_result=[], last_score=None, epoch=-1):
         ap_table.add_row(f"{epoch: 3d}", ap_name, f"{ap_color}{ap_value:.2f}", ar_name, f"{ar_color}{ar_value:.2f}")
 
     return ap_table, this_ap
+
+def merge_coco_objects(coco_list):
+    """
+    Merge multiple COCO objects into a single one.
+
+    Args:
+        coco_list (List[COCO]): A list of COCO objects to merge.
+
+    Returns:
+        COCO: The merged COCO object.
+    """
+    if not coco_list:
+        return None  # Return None if the list is empty
+
+    if len(coco_list) == 1:
+        return coco_list[0]  # If there's only one object, return it directly
+
+    # Start with the first COCO object
+    merged_coco = COCO()
+    merged_coco.dataset = copy.deepcopy(coco_list[0].dataset)
+
+    for coco in coco_list[1:]:
+        # Check if the data is identical to the already merged data
+        if coco.dataset == merged_coco.dataset:
+            continue  # If identical, skip this object
+
+        # Merge images
+        max_img_id = max(merged_coco.imgs.keys()) if merged_coco.imgs else 0
+        for img_id, img_info in coco.imgs.items():
+            if img_info not in merged_coco.dataset['images']:
+                new_img_id = img_id + max_img_id
+                merged_coco.dataset['images'].append({**img_info, 'id': new_img_id})
+
+        # Merge categories
+        max_cat_id = max(merged_coco.cats.keys()) if merged_coco.cats else 0
+        for cat_id, cat_info in coco.cats.items():
+            if cat_info not in merged_coco.dataset['categories']:
+                new_cat_id = cat_id + max_cat_id
+                merged_coco.dataset['categories'].append({**cat_info, 'id': new_cat_id})
+
+        # Merge annotations
+        max_ann_id = max(ann['id'] for ann in merged_coco.dataset['annotations']) if merged_coco.dataset['annotations'] else 0
+        for ann in coco.dataset['annotations']:
+            if ann not in merged_coco.dataset['annotations']:
+                new_ann = copy.deepcopy(ann)
+                new_ann['id'] = ann['id'] + max_ann_id
+                new_ann['image_id'] = ann['image_id'] + max_img_id
+                new_ann['category_id'] = ann['category_id'] + max_cat_id
+                merged_coco.dataset['annotations'].append(new_ann)
+
+    # Create index
+    merged_coco.createIndex()
+
+    return merged_coco
\ No newline at end of file

From 295fcde9be78397fdf88ae6796f761fe085bf04f Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Fri, 16 Aug 2024 09:40:51 +0000
Subject: [PATCH 08/13] fix bug of auto_download and coco

---
 yolo/tools/data_loader.py | 2 +-
 yolo/tools/solver.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index 53db5d83..b7d6912e 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -234,7 +234,7 @@ def create_dataloader(data_cfg: DataConfig, dataset_cfg: DatasetConfig, task: st
     if task == "inference":
         return StreamDataLoader(data_cfg)
 
-    if dataset_cfg.auto_download:
+    if dataset_cfg.get("auto_download",None):
         prepare_dataset(dataset_cfg, task)
 
     return YoloDataLoader(data_cfg, dataset_cfg, task, use_ddp)
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index a581bb51..28c596be 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -287,7 +287,7 @@ def solve(self, dataloader, epoch_idx=1):
             json.dump(predict_json, f)
 
         # yolo dataset or no result will ignore
-        if predict_json and len(self.coco_gt)>0:
+        if predict_json and self.coco_gt:
             self.progress.start_pycocotools()
             result = calculate_ap(self.coco_gt, predict_json)
             self.progress.finish_pycocotools(result, epoch_idx)

From a35e6037dc28b9d7167560ff9b4f3005a67e2c3f Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Fri, 16 Aug 2024 10:34:22 +0000
Subject: [PATCH 09/13] fix: yolo format dataloader

---
 yolo/tools/data_loader.py | 7 +++----
 yolo/tools/solver.py      | 5 +++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index b7d6912e..9d1726c3 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -111,7 +111,6 @@ def filter_data(self, images_path: Path, labels_path: Path, phase_name: str) ->
         for image_name in track(images_list, description="Filtering data"):
             if not image_name.lower().endswith((".jpg", ".jpeg", ".png")):
                 continue
-            image_id = Path(image_name).stem
 
             if data_type == "json": 
                 image_info = image_info_dict.get(image_name, None)
@@ -124,11 +123,11 @@ def filter_data(self, images_path: Path, labels_path: Path, phase_name: str) ->
                     continue
 
             elif data_type == "txt":
-                label_path = labels_path / f"{image_id}.txt"
+                label_path = labels_path / Path(image_name).with_suffix('.txt')
                 if not label_path.is_file():
                     continue
-                with open(label_path, "r") as file:
-                    image_seg_annotations = [list(map(float, line.strip().split())) for line in file]
+                with label_path.open("r") as f:
+                    image_seg_annotations = [list(map(float, line.strip().split())) for line in f]
             else:
                 image_seg_annotations = []
             # TODO: correct the box and log the image file
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index 28c596be..30bfccb7 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -242,12 +242,12 @@ def __init__(
                 images_paths = list(images_paths)
             self.images_paths = [Path(cfg.dataset.path) / images_path for images_path in images_paths]
             
-            
+            # get the gt label
             json_paths = [locate_label_paths(labels_path, cfg.dataset.get("validation", "validation")) \
                             for labels_path in self.labels_paths]
 
             # merge coco object, duplicate data is not merged
-            coco_objects = [COCO(json_path[0]) for json_path in json_paths if json_path[0]]
+            coco_objects = [COCO(json_path[0]) for json_path in json_paths if json_path[0] and json_path[0]=="json"]
             self.coco_gt = merge_coco_objects(coco_objects)
 
     def solve(self, dataloader, epoch_idx=1):
@@ -257,6 +257,7 @@ def solve(self, dataloader, epoch_idx=1):
         # only save the unique path
         image_info_dicts = {}
         for images_path, labels_path in zip(self.images_paths, self.labels_paths):
+            # TODO:YOLO get the match id
             _, image_info_dict = create_image_metadata(labels_path)
             modified_dict = {f"{images_path/key}": value for key, value in image_info_dict.items()}
             image_info_dicts.update(modified_dict)

From 06ecb3f791875c7528f5fda00e320ad4e05b4572 Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Mon, 19 Aug 2024 07:03:46 +0000
Subject: [PATCH 10/13] add: support the format of yolo during validation, but
 don't calculate the more ap with predict.json fix: GradScaler should be used
 under gpu update: use predcit.json to compute the AP logic

---
 yolo/tools/data_loader.py |  6 +--
 yolo/tools/solver.py      | 81 +++++++++++++++++++++------------------
 yolo/utils/model_utils.py |  6 ++-
 3 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py
index 9d1726c3..23720789 100644
--- a/yolo/tools/data_loader.py
+++ b/yolo/tools/data_loader.py
@@ -74,12 +74,12 @@ def load_data(self, images_path: Path, labels_path: Path, phase_name: str):
         cache_path = images_path.with_suffix(".cache")
 
         if not cache_path.exists():
-            logger.info("🏭 Generating {} cache", phase_name)
             data = self.filter_data(images_path, labels_path, phase_name)
+            logger.info("🏭 Generating {} cache, containing {} samples", phase_name, len(data))
             torch.save(data, cache_path)
         else:
             data = torch.load(cache_path)
-            logger.info("📦 Loaded {} cache", phase_name)
+            logger.info("📦 Loaded {} cache, containing {} samples", phase_name, len(data))
             # TODO: add Validate cache
             # if data[0][0].parent == Path("images")/phase_name:
             #     logger.info("✅ Cache validation successful")
@@ -333,4 +333,4 @@ def stop(self):
             self.thread.join(timeout=1)
 
     def __len__(self):
-        return self.queue.qsize() if not self.is_stream else 0
+        return self.queue.qsize() if not self.is_stream else 0
\ No newline at end of file
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index 30bfccb7..b5195eb4 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -58,13 +58,13 @@ def __init__(self, cfg: Config, model: YOLO, vec2box: Vec2Box, progress: Progres
         self.validation_dataloader = create_dataloader(
             cfg.task.validation.data, cfg.dataset, cfg.task.validation.task, use_ddp
         )
-        self.validator = ModelValidator(cfg, model, vec2box, progress, device)
+        self.validator = ModelValidator(cfg, model, vec2box, progress, self.device)
 
-        if getattr(train_cfg.ema, "enabled", False):
-            self.ema = ExponentialMovingAverage(model, decay=train_cfg.ema.decay)
-        else:
-            self.ema = None
-        self.scaler = GradScaler()
+
+        self.ema = ExponentialMovingAverage(model, decay=train_cfg.ema.decay) if getattr(train_cfg.ema, "enabled", False) else None
+        
+        self.scaler = GradScaler() if self.device == 'cuda' else None
+        
 
     def train_one_batch(self, images: Tensor, targets: Tensor):
         images, targets = images.to(self.device), targets.to(self.device)
@@ -76,11 +76,16 @@ def train_one_batch(self, images: Tensor, targets: Tensor):
             main_predicts = self.vec2box(predicts["Main"])
             loss, loss_item = self.loss_fn(aux_predicts, main_predicts, targets)
 
-        self.scaler.scale(loss).backward()
-        self.scaler.unscale_(self.optimizer)
-        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=10.0)
-        self.scaler.step(self.optimizer)
-        self.scaler.update()
+        if self.scaler:
+            self.scaler.scale(loss).backward()
+            self.scaler.unscale_(self.optimizer)
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=10.0)
+            self.scaler.step(self.optimizer)
+            self.scaler.update()
+        else:
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=10.0)
+            self.optimizer.step()
 
         return loss_item
 
@@ -223,6 +228,7 @@ def __init__(
         self.progress = progress
         
         self.post_proccess = PostProccess(vec2box, cfg.task.validation.nms)
+        # TODO: yolo don't need this step
         self.json_path = self.progress.save_path / "predict.json"
 
         with contextlib.redirect_stdout(io.StringIO()):
@@ -243,11 +249,11 @@ def __init__(
             self.images_paths = [Path(cfg.dataset.path) / images_path for images_path in images_paths]
             
             # get the gt label
-            json_paths = [locate_label_paths(labels_path, cfg.dataset.get("validation", "validation")) \
+            self.labels_infos = [locate_label_paths(labels_path, cfg.dataset.get("validation", "validation")) \
                             for labels_path in self.labels_paths]
 
             # merge coco object, duplicate data is not merged
-            coco_objects = [COCO(json_path[0]) for json_path in json_paths if json_path[0] and json_path[0]=="json"]
+            coco_objects = [COCO(json_path[0]) for json_path in self.labels_infos if json_path[0] and json_path[1]=="json"]
             self.coco_gt = merge_coco_objects(coco_objects)
 
     def solve(self, dataloader, epoch_idx=1):
@@ -256,19 +262,20 @@ def solve(self, dataloader, epoch_idx=1):
         predict_json, mAPs = [], defaultdict(list)
         # only save the unique path
         image_info_dicts = {}
-        for images_path, labels_path in zip(self.images_paths, self.labels_paths):
-            # TODO:YOLO get the match id
-            _, image_info_dict = create_image_metadata(labels_path)
-            modified_dict = {f"{images_path/key}": value for key, value in image_info_dict.items()}
-            image_info_dicts.update(modified_dict)
+        for images_path, labels_info in zip(self.images_paths, self.labels_infos):
+            # only coco need to calculate the more ap
+            if labels_info[1] == "json":
+                _, image_info_dict = create_image_metadata(labels_info[0])
+                modified_dict = {f"{images_path/key}": value for key, value in image_info_dict.items()}
+                image_info_dicts.update(modified_dict)
           
         self.progress.start_one_epoch(len(dataloader), task="Validate")
         for batch_size, images, targets, rev_tensor, img_paths in dataloader:
             images_path = dataloader.dataset.images_paths
             images, targets, rev_tensor = images.to(self.device), targets.to(self.device), rev_tensor.to(self.device)
             with torch.no_grad():
-                predicts = self.model(images)
-                predicts = self.post_proccess(predicts)
+                output = self.model(images)
+                predicts = self.post_proccess(output)
                 for idx, predict in enumerate(predicts):
                     mAP = calculate_map(predict, targets[idx])
                     for mAP_key, mAP_val in mAP.items():
@@ -276,26 +283,26 @@ def solve(self, dataloader, epoch_idx=1):
 
             avg_mAPs = {key: torch.mean(torch.stack(val)) for key, val in mAPs.items()}
             self.progress.one_batch(avg_mAPs)
-
-            predict_json.extend(predicts_to_json(img_paths, image_info_dicts, predicts, rev_tensor))
+            if image_info_dicts:
+                predict_json.extend(predicts_to_json(img_paths, image_info_dicts, predicts, rev_tensor))
         self.progress.finish_one_epoch(avg_mAPs, epoch_idx=epoch_idx)
         self.progress.visualize_image(images, targets, predicts, epoch_idx=epoch_idx)
 
-        with open(self.json_path, "w") as f:
-            predict_json = collect_prediction(predict_json, self.progress.local_rank)
-            if self.progress.local_rank != 0:
-                return
-            json.dump(predict_json, f)
-
-        # yolo dataset or no result will ignore
-        if predict_json and self.coco_gt:
-            self.progress.start_pycocotools()
-            result = calculate_ap(self.coco_gt, predict_json)
-            self.progress.finish_pycocotools(result, epoch_idx)
+        if predict_json:
+            with open(self.json_path, "w") as f:
+                predict_json = collect_prediction(predict_json, self.progress.local_rank)
+                if self.progress.local_rank == 0:
+                    json.dump(predict_json, f)
+                    
+                    if hasattr(self, "coco_gt"):
+                        self.progress.start_pycocotools()
+                        result = calculate_ap(self.coco_gt, predict_json)
+                        self.progress.finish_pycocotools(result, epoch_idx)
+                    else:
+                        logger.warning("⚠️ COCO ground truth not found. Please check dataset configuration.")
+                else:
+                    return
         else:
-            if not predict_json:
-                logger.warning("⚠️ No predictions available for evaluation.")
-            if not hasattr(self, "coco_gt"):
-                logger.warning("⚠️ COCO ground truth not found. Please check dataset configuration.")
+            logger.warning("⚠️ No predictions available for evaluation.")
 
         return avg_mAPs
diff --git a/yolo/utils/model_utils.py b/yolo/utils/model_utils.py
index 990de5e8..3f456d0b 100644
--- a/yolo/utils/model_utils.py
+++ b/yolo/utils/model_utils.py
@@ -172,8 +172,12 @@ def predicts_to_json(img_paths, image_id_map, predicts, rev_tensor):
         bboxes[:, 1:5] = (bboxes[:, 1:5] - shift[None]) / scale[None]
         bboxes[:, 1:5] = transform_bbox(bboxes[:, 1:5], "xyxy -> xywh")
         for cls, *pos, conf in bboxes:
+            img_path_map = image_id_map.get(str(img_path))
+            # when not exist, continue
+            if img_path_map is None:
+                continue
             bbox = {
-                "image_id": image_id_map[str(img_path)]["id"],
+                "image_id": img_path_map["id"],
                 "category_id": IDX_TO_ID[int(cls)],
                 "bbox": [float(p) for p in pos],
                 "score": float(conf),

From ca8c88f8519356348dca0eaf5fc2226a406b15eb Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Wed, 21 Aug 2024 07:23:48 +0000
Subject: [PATCH 11/13] fix dataloader test

---
 tests/test_tools/test_data_loader.py | 2 +-
 tests/test_tools/test_solver.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_tools/test_data_loader.py b/tests/test_tools/test_data_loader.py
index 0cfb9c65..005d8ed2 100644
--- a/tests/test_tools/test_data_loader.py
+++ b/tests/test_tools/test_data_loader.py
@@ -12,7 +12,7 @@ def test_create_dataloader_cache(train_cfg: Config):
     train_cfg.task.data.shuffle = False
     train_cfg.task.data.batch_size = 2
 
-    cache_file = Path("tests/data/train.cache")
+    cache_file = Path("tests/data/images/train.cache")
     cache_file.unlink(missing_ok=True)
 
     make_cache_loader = create_dataloader(train_cfg.task.data, train_cfg.dataset)
diff --git a/tests/test_tools/test_solver.py b/tests/test_tools/test_solver.py
index 217f6877..822d991c 100644
--- a/tests/test_tools/test_solver.py
+++ b/tests/test_tools/test_solver.py
@@ -17,7 +17,7 @@
 @pytest.fixture
 def model_validator(validation_cfg: Config, model: YOLO, vec2box: Vec2Box, validation_progress_logger, device):
     validator = ModelValidator(
-        validation_cfg.task, validation_cfg.dataset, model, vec2box, validation_progress_logger, device
+        validation_cfg, model, vec2box, validation_progress_logger, device
     )
     return validator
 

From 9eb2126c0c2fe04c83c33fc0e8bf78539eac67ab Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Wed, 21 Aug 2024 07:27:47 +0000
Subject: [PATCH 12/13] 1. add #76 code 2. optimize the program for calculating
 AP and support more AP index 3. fix the choice of mode

---
 yolo/config/task/train.yaml      |   1 +
 yolo/tools/solver.py             |   3 +-
 yolo/utils/bounding_box_utils.py | 138 +++++++++++++++++++++++++++----
 3 files changed, 123 insertions(+), 19 deletions(-)

diff --git a/yolo/config/task/train.yaml b/yolo/config/task/train.yaml
index d3eab6cf..ff5ffbac 100644
--- a/yolo/config/task/train.yaml
+++ b/yolo/config/task/train.yaml
@@ -1,4 +1,5 @@
 task: train
+mode: detection
 
 defaults:
   - validation: ../validation
diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py
index b5195eb4..3ca292fa 100644
--- a/yolo/tools/solver.py
+++ b/yolo/tools/solver.py
@@ -279,7 +279,8 @@ def solve(self, dataloader, epoch_idx=1):
                 for idx, predict in enumerate(predicts):
                     mAP = calculate_map(predict, targets[idx])
                     for mAP_key, mAP_val in mAP.items():
-                        mAPs[mAP_key].append(mAP_val)
+                        if mAP_key != "class_mAP":
+                            mAPs[mAP_key].append(mAP_val)
 
             avg_mAPs = {key: torch.mean(torch.stack(val)) for key, val in mAPs.items()}
             self.progress.one_batch(avg_mAPs)
diff --git a/yolo/utils/bounding_box_utils.py b/yolo/utils/bounding_box_utils.py
index 12d95c58..ab24837d 100644
--- a/yolo/utils/bounding_box_utils.py
+++ b/yolo/utils/bounding_box_utils.py
@@ -13,6 +13,18 @@
 
 
 def calculate_iou(bbox1, bbox2, metrics="iou") -> Tensor:
+    """
+    Calculate Intersection over Union (IoU) between two sets of bounding boxes.
+
+    Args:
+        bbox1 (Tensor): First set of bounding boxes. Shape: [A, 4] or [B, A, 4] or [B, Z, A, 4]
+        bbox2 (Tensor): Second set of bounding boxes. Shape: [B, 4] or [B, A, 4] or [B, Z, A, 4]
+        metrics (str, optional): IoU metric to use. Default: "iou".
+
+    Returns:
+        Tensor: IoU scores between each pair of bounding boxes. Shape: [B, A, B] or [B, Z, A, B]
+    """ 
+        
     metrics = metrics.lower()
     EPS = 1e-9
     dtype = bbox1.dtype
@@ -76,6 +88,16 @@ def calculate_iou(bbox1, bbox2, metrics="iou") -> Tensor:
 
 
 def transform_bbox(bbox: Tensor, indicator="xywh -> xyxy"):
+    """
+    Transform bounding boxes between different formats.
+
+    Args:
+        bbox (Tensor): Input bounding boxes. Shape: [B, N, 4]
+        indicator (str, optional): Transformation indicator. Default: "xywh -> xyxy"
+
+    Returns:
+        Tensor: Transformed bounding boxes. Shape: [B, N, 4]
+    """
     data_type = bbox.dtype
     in_type, out_type = indicator.replace(" ", "").split("->")
 
@@ -388,7 +410,7 @@ def bbox_nms(cls_dist: Tensor, bbox: Tensor, nms_cfg: NMSConfig, confidence: Opt
     valid_box = bbox[valid_mask.repeat(1, 1, 4)].view(-1, 4)
 
     batch_idx, *_ = torch.where(valid_mask)
-    nms_idx = batched_nms(valid_box, valid_cls, batch_idx, nms_cfg.min_iou)
+    nms_idx = batched_nms(valid_box, valid_con, batch_idx, nms_cfg.min_iou)
     predicts_nms = []
     for idx in range(cls_dist.size(0)):
         instance_idx = nms_idx[idx == batch_idx[nms_idx]]
@@ -401,29 +423,73 @@ def bbox_nms(cls_dist: Tensor, bbox: Tensor, nms_cfg: NMSConfig, confidence: Opt
     return predicts_nms
 
 
-def calculate_map(predictions, ground_truths, iou_thresholds=arange(0.5, 1, 0.05)) -> Dict[str, Tensor]:
-    # TODO: Refactor this block, Flexible for calculate different mAP condition?
+def calculate_map(
+    predictions: Tensor,
+    ground_truths: Tensor,
+    iou_thresholds: Union[List[float], Tensor] = torch.arange(0.5, 1.0, 0.05),
+    class_metrics: bool = True,
+) -> Dict[str, Union[Tensor, Dict[int, Tensor]]]:
+    """
+    Calculate Mean Average Precision (mAP) for object detection.
+
+    Args:
+        predictions (Tensor): Predicted bounding boxes and scores. Shape: [N, 6] (class, x1, y1, x2, y2, confidence)
+        ground_truths (Tensor): Ground truth bounding boxes. Shape: [M, 5] (class, x1, y1, x2, y2)
+        iou_thresholds (Union[List[float], Tensor]): IoU thresholds for mAP calculation. Default: [0.5, 0.55, ..., 0.95]
+        class_metrics (bool): Whether to calculate class-wise mAP. Default: True
+
+    Returns:
+        Dict[str, Union[Tensor, Dict[int, Tensor]]]: A dictionary containing mAP scores:
+            - mAP.XX: mAP at IoU threshold XX
+            - mAP.5:.95: mean mAP over IoU thresholds 0.5 to 0.95
+            - class_mAP: class-wise mAP scores (if class_metrics is True)
+    """
     device = predictions.device
     n_preds = predictions.size(0)
     n_gts = (ground_truths[:, 0] != -1).sum()
+    
+    # process the case of no predictions
+    if n_preds == 0:
+        mAP = {f"mAP.{int(threshold*100)}": torch.tensor(0.0, device=device) for threshold in iou_thresholds}
+        mAP["mAP.5:.95"] = torch.tensor(0.0, device=device)
+        
+        if class_metrics:
+            unique_classes = torch.unique(ground_truths[:n_gts, 0])
+            class_mAP = {int(cls.item()): {f"mAP.{int(threshold*100)}": torch.tensor(0.0, device=device) 
+                                           for threshold in iou_thresholds} for cls in unique_classes}
+            for cls in class_mAP:
+                class_mAP[cls]["mAP.5:.95"] = torch.tensor(0.0, device=device)
+            mAP["class_mAP"] = class_mAP
+        logger.info("🧸 Found no predictions")
+        return mAP
+    
+    # remove the padded data
     ground_truths = ground_truths[:n_gts]
+    
+    ious = calculate_iou(predictions[:, 1:-1], ground_truths[:, 1:1+4])  # [n_preds, n_gts]
+    
+    if isinstance(iou_thresholds, list):
+        iou_thresholds = torch.tensor(iou_thresholds, device=device)
+    
     aps = []
-
-    ious = calculate_iou(predictions[:, 1:-1], ground_truths[:, 1:])  # [n_preds, n_gts]
+    class_aps = {} if class_metrics else None
 
     for threshold in iou_thresholds:
         tp = torch.zeros(n_preds, device=device, dtype=bool)
 
+        # get the max iou and the index of the max iou
         max_iou, max_indices = ious.max(dim=1)
+        # get the index of the max iou that is above the threshold
         above_threshold = max_iou >= threshold
+        # match the class
         matched_classes = predictions[:, 0] == ground_truths[max_indices, 0]
-        max_match = torch.zeros_like(ious)
-        max_match[arange(n_preds), max_indices] = max_iou
-        if max_match.size(0):
-            tp[max_match.argmax(dim=0)] = True
-        tp[~above_threshold | ~matched_classes] = False
+        
+        valid_matches = above_threshold & matched_classes
 
-        _, indices = torch.sort(predictions[:, 1], descending=True)
+        tp[valid_matches] = True
+        
+        # order by confidence
+        _, indices = torch.sort(predictions[:, -1], descending=True)
         tp = tp[indices]
 
         tp_cumsum = torch.cumsum(tp, dim=0)
@@ -432,17 +498,53 @@ def calculate_map(predictions, ground_truths, iou_thresholds=arange(0.5, 1, 0.05
         precision = tp_cumsum / (tp_cumsum + fp_cumsum + 1e-6)
         recall = tp_cumsum / (n_gts + 1e-6)
 
+        # add the first and last point to ensure correct calculation of AP
         precision = torch.cat([torch.ones(1, device=device), precision, torch.zeros(1, device=device)])
         recall = torch.cat([torch.zeros(1, device=device), recall, torch.ones(1, device=device)])
 
-        precision, _ = torch.cummax(precision.flip(0), dim=0)
-        precision = precision.flip(0)
+        # calculate the smoothed precision
+        precision = torch.cummax(precision.flip(0), dim=0)[0].flip(0)
 
-        ap = torch.trapezoid(precision, recall)
+        ap = torch.trapz(precision, recall)
         aps.append(ap)
 
-    mAP = {
-        "mAP.5": aps[0],
-        "mAP.5:.95": torch.mean(torch.stack(aps)),
-    }
+        if class_metrics:
+            for cls in torch.unique(ground_truths[:, 0]):
+                cls_mask = predictions[:, 0] == cls
+                if cls_mask.sum() == 0:
+                    continue
+                cls_tp = tp[cls_mask]
+                cls_fp = ~cls_tp
+                cls_gt_count = (ground_truths[:, 0] == cls).sum()
+                
+                cls_precision = torch.cumsum(cls_tp, dim=0) / (torch.cumsum(cls_tp, dim=0) + torch.cumsum(cls_fp, dim=0) + 1e-6)
+                cls_recall = torch.cumsum(cls_tp, dim=0) / (cls_gt_count + 1e-6)
+                
+                cls_precision = torch.cat([torch.ones(1, device=device), cls_precision, torch.zeros(1, device=device)])
+                cls_recall = torch.cat([torch.zeros(1, device=device), cls_recall, torch.ones(1, device=device)])
+                
+                cls_precision = torch.cummax(cls_precision.flip(0), dim=0)[0].flip(0)
+                
+                cls_ap = torch.trapz(cls_precision, cls_recall)
+                
+                if cls.item() not in class_aps:
+                    class_aps[int(cls.item())] = []
+                class_aps[int(cls.item())].append(cls_ap)
+
+    mAP = {}
+    for i, threshold in enumerate(iou_thresholds):
+        mAP[f"mAP.{int(threshold*100)}"] = aps[i]
+    
+    # add mAP.5:.95
+    mAP["mAP.5:.95"] = torch.mean(torch.stack(aps))
+
+    if class_metrics:
+        class_mAP = {}
+        for cls in class_aps:
+            class_mAP[cls] = {}
+            for i, threshold in enumerate(iou_thresholds):
+                class_mAP[cls][f"mAP.{int(threshold*100)}"] = class_aps[cls][i]
+            class_mAP[cls]["mAP.5:.95"] = torch.mean(torch.stack(class_aps[cls]))
+        mAP["class_mAP"] = class_mAP
+
     return mAP

From 70c0b484a2ada214d2774aaee5fa715fee961a07 Mon Sep 17 00:00:00 2001
From: yongjian_zhang <yongjian_zhang@foxitsoftware.com>
Date: Thu, 22 Aug 2024 01:43:47 +0000
Subject: [PATCH 13/13] fix: nms #76 add: more test_calculate_map case

---
 tests/test_utils/test_bounding_box_utils.py | 118 ++++++++++++++++----
 1 file changed, 98 insertions(+), 20 deletions(-)

diff --git a/tests/test_utils/test_bounding_box_utils.py b/tests/test_utils/test_bounding_box_utils.py
index 58a9a917..cdaede78 100644
--- a/tests/test_utils/test_bounding_box_utils.py
+++ b/tests/test_utils/test_bounding_box_utils.py
@@ -146,23 +146,62 @@ def test_anc2box_autoanchor(inference_v7_cfg: Config):
 
 
 def test_bbox_nms():
-    cls_dist = tensor(
-        [[[0.1, 0.7, 0.2], [0.6, 0.3, 0.1]], [[0.4, 0.4, 0.2], [0.5, 0.4, 0.1]]]  # Example class distribution
+    cls_dist = torch.tensor(
+        [
+            [
+                [0.7, 0.1, 0.2],  # High confidence, class 0
+                [0.3, 0.6, 0.1],  # High confidence, class 1
+                [-3.0, -2.0, -1.0],  # low confidence, class 2
+                [0.6, 0.2, 0.2],  # Medium confidence, class 0
+            ],
+            [
+                [0.55, 0.25, 0.2],  # Medium confidence, class 0
+                [-4.0, -0.5, -2.0],  # low confidence, class 1
+                [0.15, 0.2, 0.65],  # Medium confidence, class 2
+                [0.8, 0.1, 0.1],  # High confidence, class 0
+            ],
+        ],
+        dtype=float32,
     )
-    bbox = tensor(
-        [[[50, 50, 100, 100], [60, 60, 110, 110]], [[40, 40, 90, 90], [70, 70, 120, 120]]],  # Example bounding boxes
+    bbox = torch.tensor(
+        [
+            [
+                [0, 0, 160, 120],  # Overlaps with box 4
+                [160, 120, 320, 240],
+                [0, 120, 160, 240],
+                [16, 12, 176, 132],
+            ],
+            [
+                [0, 0, 160, 120],  # Overlaps with box 4
+                [160, 120, 320, 240],
+                [0, 120, 160, 240],
+                [16, 12, 176, 132],
+            ],
+        ],
         dtype=float32,
     )
     nms_cfg = NMSConfig(min_confidence=0.5, min_iou=0.5)
 
-    expected_output = [
-        tensor(
+    # Batch 1:
+    #  - box 1 is kept with class 0 as it has a higher confidence than box 4 i.e. box 4 is filtered out
+    #  - box 2 is kept with class 1
+    #  - box 3 is rejected by the confidence filter
+    # Batch 2:
+    #  - box 4 is kept with class 0 as it has a higher confidence than box 1 i.e. box 1 is filtered out
+    #  - box 2 is rejected by the confidence filter
+    #  - box 3 is kept with class 2
+    expected_output = torch.tensor(
+        [
             [
-                [1.0000, 50.0000, 50.0000, 100.0000, 100.0000, 0.6682],
-                [0.0000, 60.0000, 60.0000, 110.0000, 110.0000, 0.6457],
-            ]
-        )
-    ]
+                [0.0, 0.0, 0.0, 160.0, 120.0, 0.6682],
+                [1.0, 160.0, 120.0, 320.0, 240.0, 0.6457],
+            ],
+            [
+                [0.0, 16.0, 12.0, 176.0, 132.0, 0.6900],
+                [2.0, 0.0, 120.0, 160.0, 240.0, 0.6570],
+            ],
+        ]
+    )
 
     output = bbox_nms(cls_dist, bbox, nms_cfg)
 
@@ -171,13 +210,52 @@ def test_bbox_nms():
 
 
 def test_calculate_map():
-    predictions = tensor([[0, 60, 60, 160, 160, 0.5], [0, 40, 40, 120, 120, 0.5]])  # [class, x1, y1, x2, y2]
-    ground_truths = tensor([[0, 50, 50, 150, 150], [0, 30, 30, 100, 100]])  # [class, x1, y1, x2, y2]
-
-    mAP = calculate_map(predictions, ground_truths)
-
-    expected_ap50 = tensor(0.5)
-    expected_ap50_95 = tensor(0.2)
+    # set test data
+    predictions = torch.tensor([
+        [0, 60, 60, 160, 160, 0.9],  # [class, x1, y1, x2, y2, confidence]
+        [0, 40, 40, 120, 120, 0.8],
+        [1, 10, 10, 70, 70, 0.7]
+    ])
+    ground_truths = torch.tensor([
+        [0, 50, 50, 150, 150],  # [class, x1, y1, x2, y2]
+        [1, 15, 15, 65, 65],
+        [0, 30, 30, 100, 100],
+    ])
+
+    # test basic function
+    result = calculate_map(predictions, ground_truths)
+    assert "mAP.50" in result
+    assert "mAP.5:.95" in result
+    assert 0 <= result["mAP.50"] <= 1
+    assert 0 <= result["mAP.5:.95"] <= 1
+
+    # test class-level metrics
+    assert "class_mAP" in result
+    assert 0 in result["class_mAP"]
+    assert 1 in result["class_mAP"]
+    
+    # test different IoU thresholds
+    custom_thresholds = [0.3, 0.5, 0.7]
+    result_custom = calculate_map(predictions, ground_truths, iou_thresholds=custom_thresholds)
+    assert "mAP.30" in result_custom
+    assert "mAP.50" in result_custom
+    assert "mAP.70" in result_custom
+
+    # test edge cases, not considered
+    empty_predictions = torch.zeros((0, 6))
+    empty_result = calculate_map(empty_predictions, ground_truths)
+    assert empty_result["mAP.50"] == 0
+
+    empty_ground_truths = torch.zeros((0, 5))
+    empty_gt_result = calculate_map(predictions, empty_ground_truths)
+    assert empty_gt_result["mAP.50"] == 0
+
+    # test perfect match
+    perfect_predictions = torch.tensor([
+        [0, 50, 50, 150, 150, 1.0],
+        [0, 30, 30, 100, 100, 1.0],
+        [1, 15, 15, 65, 65, 1.0]
+    ])
+    perfect_result = calculate_map(perfect_predictions, ground_truths)
+    assert pytest.approx(perfect_result["mAP.50"], 1e-6) == 1.0
 
-    assert isclose(mAP["mAP.5"], expected_ap50, atol=1e-5), f"AP50 mismatch"
-    assert isclose(mAP["mAP.5:.95"], expected_ap50_95, atol=1e-5), f"Mean AP mismatch"