Project-AgML · yuvrajvirk · Mar 29, 2022 · Aug 20, 2022
diff --git a/agml/_assets/public_datasources.json b/agml/_assets/public_datasources.json
@@ -1043,5 +1043,25 @@
                 0.25335168838500977
             ]
         }
+    },
+    "sugarbeet_weed_segmentation_europe": {
+        "classes": {
+            "1": "background",
+            "2": "sugarbeet",
+            "3": "weed"
+        },
+        "ml_task": "semantic_segmentation",
+        "ag_task": "weed_segmentation",
+        "location": {
+            "continent": "Europe",
+            "country": "Switzerland and Germany"
+        },
+        "sensor_modality": "multispectral",
+        "platform": "uav",
+        "input_data_format": "png",
+        "annotation_format": "image",
+        "n_images": "11971",
+        "docs_url": "https://projects.asl.ethz.ch/datasets/doku.php?id=weedmap:remotesensing2018weedmap#dataset_summary",
+        "external_image_sources": ["rgb-images", "g-images", "b-images", "cir-images", "ndvi-images", "nir-images", "re-images", "binary_masks-images"]
     }
 }
diff --git a/agml/_assets/source_citations.json b/agml/_assets/source_citations.json
@@ -114,5 +114,9 @@
     "white_grapes_and_leaves_segmentation": {
         "license": "",
         "citation": "@inproceedings{kalampokas2020semantic,\n  title={Semantic segmentation of vineyard images using convolutional neural networks},\n  author={Kalampokas, Theofanis and Tziridis, Konstantinos and Nikolaou, Alexandros and Vrochidou, Eleni and Papakostas, George A and Pachidis, Theodore and Kaburlasos, Vassilis G},\n  booktitle={International Conference on Engineering Applications of Neural Networks},\n  pages={292--303},\n  year={2020},\n  organization={Springer}}"
+    },
+    "sugarbeet_weed_segmentation_europe": {
+        "license": "CC BY-SA 4.0",
+        "citation": "@ARTICLE{weedMap-2018, \n  author={I. Sa, M. Popovic, R. Khanna, Z. Chen, P. Lottes, F. Liebisch, J. Nieto, C. Stachniss, A. Walter, and R. Siegwart}, \n  journal={MDPI Remote Sensing}, \n  title={WeedMap: A large-scale semantic weed mapping framework using aerial multispectral imaging and deep neural network for precision farming}, \n  year={2018}, \n  volume={10}, \n  number={9}, \n  doi={doi: 10.3390/rs10091423}, \n  month={Aug}}"
     }
 }
diff --git a/agml/_internal/preprocess.py b/agml/_internal/preprocess.py
@@ -39,8 +39,9 @@
 from agml.utils.data import load_public_sources
 from agml._internal.process_utils import (
     read_txt_file, get_image_info, get_label2id,
-    convert_bbox_to_coco, get_coco_annotation_from_obj,
-    convert_xmls_to_cocojson, move_segmentation_dataset,
+    convert_bbox_to_coco, get_coco_annotation_from_obj, convert_xmls_to_cocojson,
+    mask_annotation_per_bbox, move_segmentation_dataset,
+    create_sub_masks, create_sub_mask_annotation_per_bbox, rgb2mask
 )
 
 
@@ -869,22 +870,22 @@ def wheat_head_counting(self, dataset_name):
         label2id = {"Wheat Head": 1}
         dataset_dir = os.path.join(self.data_original_dir, dataset_name)
         anno_files = [os.path.join(dataset_dir, 'competition_train.csv'), os.path.join(dataset_dir, 'competition_test.csv'), os.path.join(dataset_dir, 'competition_val.csv')]
-        
+
         annotations = []
         for anno_file in anno_files:
-          with open(anno_file, 'r') as file:
-              reader = csv.reader(file)
-              for row in reader:
-                  img_path = os.path.join(dataset_dir, "images", row[0])
-                  anno = [img_path]
-                  bboxs = row[1].split(";")
-                  anno.append(len(bboxs))
-                  for bbox in bboxs:
-                      if bbox != "no_box":
-                        bbox = bbox.split(" ")
-                        bbox.append("1")
-                        anno.append(bbox)
-                  annotations.append(anno)
+            with open(anno_file, 'r') as file:
+                reader = csv.reader(file)
+                for row in reader:
+                    img_path = os.path.join(dataset_dir, "images", row[0])
+                    anno = [img_path]
+                    bboxs = row[1].split(";")
+                    anno.append(len(bboxs))
+                    for bbox in bboxs:
+                        if bbox != "no_box":
+                            bbox = bbox.split(" ")
+                            bbox.append("1")
+                            anno.append(bbox)
+                    annotations.append(anno)
 
         # Define path to processed annotation files
         output_json_file = os.path.join(
@@ -893,7 +894,7 @@ def wheat_head_counting(self, dataset_name):
         # Create directory for processed image files
         output_img_path = os.path.join(
             self.data_processed_dir, dataset_name, 'images')
-        create_dir(output_img_path) 
+        create_dir(output_img_path)
 
         general_info = {
             "description": "Global Wheat Head Detection (GWHD) dataset",
@@ -909,40 +910,108 @@ def wheat_head_counting(self, dataset_name):
             output_img_path, general_info, resize=512/1024)
 
     def peachpear_flower_segmentation(self, dataset_name):
-      # Create processed directories 
-      processed_dir = os.path.join(self.data_processed_dir, dataset_name)
-      os.makedirs(processed_dir, exist_ok = True)
-      processed_image_dir = os.path.join(processed_dir, 'images')
-      os.makedirs(processed_image_dir, exist_ok = True)
-      processed_annotation_dir = os.path.join(processed_dir, 'annotations')
-      os.makedirs(processed_annotation_dir, exist_ok = True)
-
-      dataset_dir = os.path.join(self.data_original_dir, dataset_name)
-
-      # Get image files
-      img_dirs = ["Peach", "Pear"]
-      img_paths = []
-      for img_dir in img_dirs:
-        img_paths += [os.path.join(dataset_dir, img_dir, file_name) for file_name in get_file_list(os.path.join(dataset_dir, img_dir))]
-
-      # Save all images as jpg in processed directory
-      for img_path in img_paths:
-        processed_path = os.path.join(processed_image_dir, img_path.split('/')[-1].replace('.bmp', '.jpg'))
-        img = cv2.imread(img_path)
-        cv2.imwrite(processed_path, img)
-
-      # Get annotation files
-      anno_dirs = ["PeachLabels", "PearLabels"]
-      anno_paths = []
-      for anno_dir in anno_dirs:
-        anno_paths += [os.path.join(dataset_dir, anno_dir, file_name) for file_name in get_file_list(os.path.join(dataset_dir, anno_dir))]
-
-      # Transform mask and save to processed directory
-      for anno_path in anno_paths:
-        img = cv2.imread(anno_path, cv2.IMREAD_GRAYSCALE)
-        img = np.where(img[:] == 255, 1, 0)
-        processed_path = os.path.join(processed_annotation_dir, anno_path.split('/')[-1])
-        cv2.imwrite(processed_path, img)
+        # Create processed directories
+        processed_dir = os.path.join(self.data_processed_dir, dataset_name)
+        os.makedirs(processed_dir, exist_ok = True)
+        processed_image_dir = os.path.join(processed_dir, 'images')
+        os.makedirs(processed_image_dir, exist_ok = True)
+        processed_annotation_dir = os.path.join(processed_dir, 'annotations')
+        os.makedirs(processed_annotation_dir, exist_ok = True)
+
+        dataset_dir = os.path.join(self.data_original_dir, dataset_name)
+
+        # Get image files
+        img_dirs = ["Peach", "Pear"]
+        img_paths = []
+        for img_dir in img_dirs:
+            img_paths += [os.path.join(dataset_dir, img_dir, file_name) for file_name in get_file_list(os.path.join(dataset_dir, img_dir))]
+
+        # Save all images as jpg in processed directory
+        for img_path in img_paths:
+            processed_path = os.path.join(processed_image_dir, img_path.split('/')[-1].replace('.bmp', '.jpg'))
+            img = cv2.imread(img_path)
+            cv2.imwrite(processed_path, img)
+
+        # Get annotation files
+        anno_dirs = ["PeachLabels", "PearLabels"]
+        anno_paths = []
+        for anno_dir in anno_dirs:
+            anno_paths += [os.path.join(dataset_dir, anno_dir, file_name) for file_name in get_file_list(os.path.join(dataset_dir, anno_dir))]
+
+        # Transform mask and save to processed directory
+        for anno_path in anno_paths:
+            img = cv2.imread(anno_path, cv2.IMREAD_GRAYSCALE)
+            img = np.where(img[:] == 255, 1, 0)
+            processed_path = os.path.join(processed_annotation_dir, anno_path.split('/')[-1])
+            cv2.imwrite(processed_path, img)
+
+    def sugarbeet_weed_segmentation_europe(self, dataset_name):
+        dataset_dir = os.path.join(self.data_original_dir, dataset_name)
+        tiles_dir = os.path.join(dataset_dir, 'Tiles')
+        rgb_paths, r_paths, g_paths, b_paths, cir_paths, ndvi_paths, nir_paths, re_paths, binary_masks, rgb_masks = \
+            ['rgb-images'], ['images'], ['g-images'], ['b-images'], ['cir-images'], \
+            ['ndvi-images'], ['nir-images'], ['re-images'], ['binary_masks-images'], []
+
+        def getImages(root, files):
+            images = []
+            for file in sorted(files):
+                unique_name = root.split('/')[-3] + file
+                images.append([os.path.join(root, file), unique_name])
+            return images
+
+        # Get image paths for each type of image
+        for root, subdirs, files in os.walk(tiles_dir):
+            dir_ = root.split('/')[-1]
+            if dir_ == 'R':
+                r_paths.extend(getImages(root, files))
+            elif dir_ == 'G':
+                g_paths.extend(getImages(root, files))
+            elif dir_ == 'CIR':
+                cir_paths.extend(getImages(root, files))
+            elif dir_ == 'NDVI':
+                ndvi_paths.extend(getImages(root, files))
+            elif dir_ == 'NIR':
+                nir_paths.extend(getImages(root, files))
+            elif dir_ == 'RE':
+                re_paths.extend(getImages(root, files))
+            elif dir_ == 'mask':
+                for file in sorted(files):
+                    unique_name = root.split('/')[-2] + file
+                    binary_masks.append([os.path.join(root, file), unique_name])
+            elif dir_ == 'B':
+                b_paths.extend(getImages(root, files))
+            elif dir_ == 'RGB':
+                rgb_paths.extend(getImages(root, files))
+            elif dir_ == 'groundtruth':
+                for file in sorted(files):
+                    if file.split('_')[-1] == 'color.png':
+                        rgb_masks.append([os.path.join(root, file), file])
+
+        image_types = [rgb_paths, r_paths, g_paths, b_paths, cir_paths, ndvi_paths, nir_paths, re_paths, binary_masks]
+
+        processed_dir = os.path.join(self.data_processed_dir, dataset_name)
+        os.makedirs(processed_dir, exist_ok = True)
+        processed_annotation_dir = os.path.join(processed_dir, 'annotations')
+        os.makedirs(processed_annotation_dir, exist_ok = True)
+
+        for image_type in image_types:
+            processed_image_dir = os.path.join(processed_dir, image_type[0])
+            os.makedirs(processed_image_dir, exist_ok = True)
+            for image_path in image_type[1:]:
+                shutil.copyfile(image_path[0], os.path.join(processed_image_dir, image_path[1]))
+
+        color2index = {
+            (0, 0, 0): 0,  # black is background
+            (0, 255, 0): 1,  # green is sugarbeet
+            (0, 0, 255): 2,  # red is weed
+        }
+
+        for rgb_mask in rgb_masks:
+            rgb_mask_img = cv2.imread(rgb_mask[0])
+            index_mask = rgb2mask(rgb_mask_img, color2index)
+            mask_name = rgb_mask[1].split('_')[0] + rgb_mask[1].split('_')[1] + ".png"
+            anno_out = os.path.join(processed_annotation_dir, mask_name)
+            cv2.imwrite(anno_out, index_mask)
 
 
 if __name__ == '__main__':

diff --git a/agml/_internal/process_utils.py b/agml/_internal/process_utils.py
@@ -568,3 +568,31 @@ def move_segmentation_dataset(
             shutil.copyfile(orig_annotation_path, out_label_path)
         else:
             annotation_preprocess_fn(orig_annotation_path, out_label_path)
+
+def rgb2mask(img, color2index):
+    '''
+    Convert rgb image to mask
+    Arguments:
+        img: image with 3 channels, rbg
+        color2index: dictionary. key: tuple containing color values (b, g, r). value: corresponding index.
+    Returns:
+        a mask with no channels and index values assigned to each pixel
+    Source: https://stackoverflow.com/a/62170172
+    '''
+    assert len(img.shape) == 3
+    height, width, ch = img.shape
+    assert ch == 3
+
+    W = np.power(256, [[0],[1],[2]])
+
+    img_id = img.dot(W).squeeze(-1) 
+    values = np.unique(img_id)
+
+    mask = np.zeros(img_id.shape)
+
+    for i, c in enumerate(values):
+        try:
+            mask[img_id==c] = color2index[tuple(img[img_id==c][0])] 
+        except:
+            pass
+    return mask