[#6] Can handle any type of annotation file.

just-hjkwon · just-hjkwon · commit dc97f0a60bce · 2020-07-10T13:52:36.000+09:00
diff --git a/dataset/dataset.py b/dataset/dataset.py
@@ -7,7 +7,7 @@
 
 
 class DataSet(ABC):
-    def __init__(self, base_directory: str, target_extension: Union[str, List[str]]):
+    def __init__(self, base_directory: str, target_extension: Union[str, List[str]], anntoation_extension: Union[None, str]=None):
         self.base_directory = base_directory
 
         if type(target_extension) == list:
@@ -17,6 +17,11 @@ def __init__(self, base_directory: str, target_extension: Union[str, List[str]])
         else:
             raise ValueError("The target_extension should be the string of extension or list of it.")
 
+        if anntoation_extension is not None:
+            self.anntoation_extension = anntoation_extension
+        else:
+            self.anntoation_extension = "json"
+
         self.train_pairs = []
         self.validation_pairs = []
 
@@ -41,8 +46,8 @@ def prepare_pairs(self):
         train_file_list.sort()
         validation_file_list.sort()
 
-        self.train_pairs = DataSet.create_pairs_with_json(train_file_list)
-        self.validation_pairs = DataSet.create_pairs_with_json(validation_file_list)
+        self.train_pairs = DataSet.create_pairs_with_annotation(train_file_list, self.anntoation_extension)
+        self.validation_pairs = DataSet.create_pairs_with_annotation(validation_file_list, self.anntoation_extension)
 
     def set_train_mode(self):
         self.is_train_mode = True
@@ -75,14 +80,14 @@ def set_random_salt(self, random_salt):
         self.random_salt = 20200305 + random_salt
 
     @staticmethod
-    def create_pairs_with_json(file_list: list):
+    def create_pairs_with_annotation(file_list: list, annotatino_extension: str):
         pairs = []
 
         for file_path in file_list:
-            json_file_path = os.path.splitext(file_path)[0] + ".json"
+            annotation_file_path = os.path.splitext(file_path)[0] + ".%s" % annotatino_extension
 
-            if os.path.exists(json_file_path) is True:
-                pairs.append((file_path, json_file_path))
+            if os.path.exists(annotation_file_path) is True:
+                pairs.append((file_path, annotation_file_path))
             else:
                 continue
 
@@ -131,6 +136,11 @@ def validation_datum_filter(file_path: str):
     def extract_label(file_path: str):
         pass
 
+    @staticmethod
+    @abstractmethod
+    def parse_annotation(file_path: str):
+        pass
+
     @staticmethod
     @abstractmethod
     def is_valid_annotation(image_width, image_height, annotation):
diff --git a/dataset/image_dataset.py b/dataset/image_dataset.py
@@ -16,23 +16,22 @@ def create_valid_indices(self, pairs: list):
         description_prefix = "Checking validity: "
 
         tqdm_iterator = tqdm.tqdm(pairs, desc=description_prefix)
-        for index, (image_file_path, json_file_path) in enumerate(tqdm_iterator):
+        for index, (image_file_path, annotation_file_path) in enumerate(tqdm_iterator):
             tqdm_iterator.set_description(description_prefix + image_file_path)
 
             label = self.extract_label(image_file_path)
 
             image_width, image_height = imagesize.get(image_file_path)
 
-            with open(json_file_path) as file:
-                annotation = json.load(file)
+            annotation = self.parse_annotation(annotation_file_path)
 
-                if self.is_valid_annotation(image_width, image_height, annotation) is False:
-                    continue
+            if self.is_valid_annotation(image_width, image_height, annotation) is False:
+                continue
 
-                if label not in valid_indices.keys():
-                    valid_indices[label] = []
+            if label not in valid_indices.keys():
+                valid_indices[label] = []
 
-                valid_indices[label].append(index)
+            valid_indices[label].append(index)
 
         return valid_indices
 
@@ -44,23 +43,22 @@ def validation_count(self, label):
 
     def get_train_filename(self, label: Union[int, str], index: int):
         pair_index = self.train_valid_indices[label][index]
-        image_file_path, json_file_path = self.train_pairs[pair_index]
+        image_file_path, annotation_file_path = self.train_pairs[pair_index]
 
-        return image_file_path, json_file_path
+        return image_file_path, annotation_file_path
 
     def get_validation_filename(self, label: Union[int, str], index: int):
         pair_index = self.validation_valid_indices[label][index]
-        image_file_path, json_file_path = self.validation_pairs[pair_index]
+        image_file_path, annotation_file_path = self.validation_pairs[pair_index]
 
-        return image_file_path, json_file_path
+        return image_file_path, annotation_file_path
 
     def get_train_datum(self, label, index):
         pair_index = self.train_valid_indices[label][index]
 
-        image_file_path, json_file_path = self.train_pairs[pair_index]
+        image_file_path, annotation_file_path = self.train_pairs[pair_index]
 
-        with open(json_file_path) as json_file:
-            annotation = json.load(json_file)
+        annotation = self.parse_annotation(annotation_file_path)
 
         image = cv2.imread(image_file_path)
 
@@ -69,10 +67,9 @@ def get_train_datum(self, label, index):
     def get_validation_datum(self, label, index):
         pair_index = self.validation_valid_indices[label][index]
 
-        image_file_path, json_file_path = self.validation_pairs[pair_index]
+        image_file_path, annotation_file_path = self.validation_pairs[pair_index]
 
-        with open(json_file_path) as json_file:
-            annotation = json.load(json_file)
+        annotation = self.parse_annotation(annotation_file_path)
 
         image = cv2.imread(image_file_path)
 
diff --git a/dataset/movie_dataset.py b/dataset/movie_dataset.py
@@ -5,7 +5,6 @@
 from typing import Union
 
 import cv2
-import json
 import tqdm
 
 
@@ -16,7 +15,7 @@ def create_valid_indices(self, pairs: list):
         description_prefix = "Checking validity: "
 
         tqdm_iterator = tqdm.tqdm(pairs, desc=description_prefix)
-        for video_index, (video_file_path, json_file_path) in enumerate(tqdm_iterator):
+        for video_index, (video_file_path, annotation_file_path) in enumerate(tqdm_iterator):
             tqdm_iterator.set_description(description_prefix + video_file_path)
 
             label = self.extract_label(video_file_path)
@@ -28,14 +27,13 @@ def create_valid_indices(self, pairs: list):
 
             valid_frame_indices = []
 
-            with open(json_file_path) as file:
-                annotations = json.load(file)
+            annotations = self.parse_annotation(annotation_file_path)
 
-                for frame_index, annotation in enumerate(annotations):
-                    if self.is_valid_annotation(video_width, video_height, annotation) is False:
-                        continue
+            for frame_index, annotation in enumerate(annotations):
+                if self.is_valid_annotation(video_width, video_height, annotation) is False:
+                    continue
 
-                    valid_frame_indices.append(frame_index)
+                valid_frame_indices.append(frame_index)
 
             if len(valid_frame_indices) == 0:
                 continue
@@ -57,17 +55,17 @@ def get_train_filename(self, label: Union[int, str], index: int):
         video_indices = sorted(list(self.train_valid_indices[label].keys()))
         video_index = video_indices[index]
 
-        movie_file_path, json_file_path = self.train_pairs[video_index]
+        movie_file_path, annotation_file_path = self.train_pairs[video_index]
 
-        return movie_file_path, json_file_path
+        return movie_file_path, annotation_file_path
 
     def get_validation_filename(self, label: Union[int, str], index: int):
         video_indices = sorted(list(self.validation_valid_indices[label].keys()))
         video_index = video_indices[index]
 
-        movie_file_path, json_file_path = self.validation_pairs[video_index]
+        movie_file_path, annotation_file_path = self.validation_pairs[video_index]
 
-        return movie_file_path, json_file_path
+        return movie_file_path, annotation_file_path
 
     def get_train_datum(self, label, index):
         video_indices = sorted(list(self.train_valid_indices[label].keys()))
@@ -76,11 +74,10 @@ def get_train_datum(self, label, index):
         random.seed(None)
         frame_index = random.choice(self.train_valid_indices[label][video_index])
 
-        movie_file_path, json_file_path = self.train_pairs[video_index]
+        movie_file_path, annotation_file_path = self.train_pairs[video_index]
 
-        with open(json_file_path) as json_file:
-            annotations = json.load(json_file)
-            annotation = annotations[frame_index]
+        annotations = self.parse_annotation(annotation_file_path)
+        annotation = annotations[frame_index]
 
         video = cv2.VideoCapture(movie_file_path)
         video.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
@@ -96,11 +93,10 @@ def get_validation_datum(self, label, index):
         random.seed(self.random_salt + index)
         frame_index = random.choice(self.validation_valid_indices[label][video_index])
 
-        movie_file_path, json_file_path = self.validation_pairs[video_index]
+        movie_file_path, annotation_file_path = self.validation_pairs[video_index]
 
-        with open(json_file_path) as json_file:
-            annotations = json.load(json_file)
-            annotation = annotations[frame_index]
+        annotations = self.parse_annotation(annotation_file_path)
+        annotation = annotations[frame_index]
 
         video = cv2.VideoCapture(movie_file_path)
         video.set(cv2.CAP_PROP_POS_FRAMES, frame_index)