ngr-francesco
diff --git a/‎core/dataset.py
+3-5 b/‎core/dataset.py
+3-5
diff --git a/‎core/utils.py
+13-71 b/‎core/utils.py
+13-71
diff --git a/‎core/yolov4.py
+23 b/‎core/yolov4.py
+23
diff --git a/‎detect.py
+2-5 b/‎detect.py
+2-5
diff --git a/‎detectvideo.py
+46-67 b/‎detectvideo.py
+46-67
@@ -13,7 +13,9 @@
 class Dataset(object):
     """implement Dataset here"""
 
-    def __init__(self, is_training: bool, dataset_type: str = "converted_coco", tiny: bool = False):
+    def __init__(self, FLAGS, is_training: bool, dataset_type: str = "converted_coco"):
+        self.tiny = FLAGS.tiny
+        self.strides, self.anchors, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
         self.dataset_type = dataset_type
 
         self.annot_path = (
@@ -28,12 +30,8 @@ def __init__(self, is_training: bool, dataset_type: str = "converted_coco", tiny
         self.data_aug = cfg.TRAIN.DATA_AUG if is_training else cfg.TEST.DATA_AUG
 
         self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
-        self.strides = (
-            np.array(cfg.YOLO.STRIDES_TINY) if tiny else np.array(cfg.YOLO.STRIDES)
-        )
         self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
         self.num_classes = len(self.classes)
-        self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS))
         self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
         self.max_bbox_per_scale = 150
 
 
@@ -5,6 +5,19 @@
 import tensorflow as tf
 from core.config import cfg
 
+def load_freeze_layer(model='yolov4', tiny=False):
+    if tiny:
+        if model == 'yolov3':
+            freeze_layouts = ['conv2d_9', 'conv2d_12']
+        else:
+            freeze_layouts = ['conv2d_17', 'conv2d_20']
+    else:
+        if model == 'yolov3':
+            freeze_layouts = ['conv2d_58', 'conv2d_66', 'conv2d_74']
+        else:
+            freeze_layouts = ['conv2d_93', 'conv2d_101', 'conv2d_109']
+    return freeze_layouts
+
 def load_weights(model, weights_file, model_name='yolov4', is_tiny=False):
     if is_tiny:
         if model_name == 'yolov3':
@@ -89,7 +102,6 @@ def get_anchors(anchors_path, tiny=False):
     else:
         return anchors.reshape(3, 3, 2)
 
-
 def image_preprocess(image, target_size, gt_boxes=None):
 
     ih, iw    = target_size
@@ -112,7 +124,6 @@ def image_preprocess(image, target_size, gt_boxes=None):
         gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
         return image_paded, gt_boxes
 
-
 def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True):
     num_classes = len(classes)
     image_h, image_w, _ = image.shape
@@ -149,10 +160,8 @@ def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_la
 
             cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX,
                         fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
-
     return image
 
-
 def bbox_iou(bboxes1, bboxes2):
     """
     @param bboxes1: (a, b, ..., 4)
@@ -316,7 +325,6 @@ def bbox_ciou(bboxes1, bboxes2):
 
     return ciou
 
-
 def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
     """
     :param bboxes: (xmin, ymin, xmax, ymax, score, class)
@@ -354,72 +362,6 @@ def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
 
     return best_bboxes
 
-def diounms_sort(bboxes, iou_threshold, sigma=0.3, method='nms', beta_nms=0.6):
-    best_bboxes = []
-    return best_bboxes
-def postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=[1,1,1]):
-    for i, pred in enumerate(pred_bbox):
-        conv_shape = pred.shape
-        output_size = conv_shape[1]
-        conv_raw_dxdy = pred[:, :, :, :, 0:2]
-        conv_raw_dwdh = pred[:, :, :, :, 2:4]
-        xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size))
-        xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2)  # [gx, gy, 1, 2]
-
-        xy_grid = np.tile(tf.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1])
-        xy_grid = xy_grid.astype(np.float)
-
-        # pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
-        pred_xy = ((tf.sigmoid(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * STRIDES[i]
-        # pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
-        pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i])
-        pred[:, :, :, :, 0:4] = tf.concat([pred_xy, pred_wh], axis=-1)
-
-    pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
-    pred_bbox = tf.concat(pred_bbox, axis=0)
-    return pred_bbox
-def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
-
-    valid_scale=[0, np.inf]
-    pred_bbox = np.array(pred_bbox)
-
-    pred_xywh = pred_bbox[:, 0:4]
-    pred_conf = pred_bbox[:, 4]
-    pred_prob = pred_bbox[:, 5:]
-
-    # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
-    pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
-                                pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
-    # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
-    org_h, org_w = org_img_shape
-    resize_ratio = min(input_size / org_w, input_size / org_h)
-
-    dw = (input_size - resize_ratio * org_w) / 2
-    dh = (input_size - resize_ratio * org_h) / 2
-
-    pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
-    pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
-
-    # # (3) clip some boxes those are out of range
-    pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
-                                np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
-    invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
-    pred_coor[invalid_mask] = 0
-
-    # # (4) discard some invalid boxes
-    bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
-    scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
-
-    # # (5) discard some boxes with low scores
-    classes = np.argmax(pred_prob, axis=-1)
-    scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
-    # scores = pred_prob[np.arange(len(pred_coor)), classes]
-    score_mask = scores > score_threshold
-    mask = np.logical_and(scale_mask, score_mask)
-    coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
-
-    return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
-
 def freeze_all(model, frozen=True):
     model.trainable = not frozen
     if isinstance(model, tf.keras.Model):
 
@@ -168,6 +168,29 @@ def decode(conv_output, output_size, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE=[1,
     else:
         return decode_tf(conv_output, output_size, NUM_CLASS, STRIDES, ANCHORS, i=i, XYSCALE=XYSCALE)
 
+def decode_train(conv_output, output_size, NUM_CLASS, STRIDES, ANCHORS, i=0, XYSCALE=[1, 1, 1]):
+    conv_output = tf.reshape(conv_output,
+                             (tf.shape(conv_output)[0], output_size, output_size, 3, 5 + NUM_CLASS))
+
+    conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, NUM_CLASS),
+                                                                          axis=-1)
+
+    xy_grid = tf.meshgrid(tf.range(output_size), tf.range(output_size))
+    xy_grid = tf.expand_dims(tf.stack(xy_grid, axis=-1), axis=2)  # [gx, gy, 1, 2]
+    xy_grid = tf.tile(tf.expand_dims(xy_grid, axis=0), [tf.shape(conv_output)[0], 1, 1, 3, 1])
+
+    xy_grid = tf.cast(xy_grid, tf.float32)
+
+    pred_xy = ((tf.sigmoid(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * \
+              STRIDES[i]
+    pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i])
+    pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
+
+    pred_conf = tf.sigmoid(conv_raw_conf)
+    pred_prob = tf.sigmoid(conv_raw_prob)
+
+    return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
+
 def decode_tf(conv_output, output_size, NUM_CLASS, STRIDES, ANCHORS, i=0, XYSCALE=[1, 1, 1]):
     conv_output = tf.reshape(conv_output,
                              (tf.shape(conv_output)[0], output_size, output_size, 3, 5 + NUM_CLASS))
 
@@ -1,5 +1,3 @@
-import time
-
 import tensorflow as tf
 physical_devices = tf.config.experimental.list_physical_devices('GPU')
 if len(physical_devices) > 0:
@@ -10,17 +8,16 @@
 from core.yolov4 import filter_boxes
 from tensorflow.python.saved_model import tag_constants
 from PIL import Image
-from core.config import cfg
 import cv2
 import numpy as np
 from tensorflow.compat.v1 import ConfigProto
 from tensorflow.compat.v1 import InteractiveSession
 
 flags.DEFINE_string('framework', 'tf', '(tf, tflite, trt')
-flags.DEFINE_string('weights', './checkpoints/yolov4-416',
+flags.DEFINE_string('weights', './checkpoints/yolov4-tiny-416',
                     'path to weights file')
 flags.DEFINE_integer('size', 416, 'resize images to')
-flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny')
+flags.DEFINE_boolean('tiny', True, 'yolo or yolo-tiny')
 flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4')
 flags.DEFINE_string('image', './data/kite.jpg', 'path to input image')
 flags.DEFINE_string('output', 'result.png', 'path to output image')
 
@@ -1,82 +1,50 @@
 import time
+import tensorflow as tf
+physical_devices = tf.config.experimental.list_physical_devices('GPU')
+if len(physical_devices) > 0:
+    tf.config.experimental.set_memory_growth(physical_devices[0], True)
 from absl import app, flags, logging
 from absl.flags import FLAGS
 import core.utils as utils
-from core.yolov4 import YOLOv4, YOLOv3, YOLOv3_tiny, decode
+from core.yolov4 import filter_boxes
+from tensorflow.python.saved_model import tag_constants
 from PIL import Image
-from core.config import cfg
 import cv2
 import numpy as np
-import tensorflow as tf
+from tensorflow.compat.v1 import ConfigProto
+from tensorflow.compat.v1 import InteractiveSession
 
-flags.DEFINE_string('framework', 'tf', '(tf, tflite')
-flags.DEFINE_string('weights', './data/yolov4.weights',
+flags.DEFINE_string('framework', 'tf', '(tf, tflite, trt')
+flags.DEFINE_string('weights', './checkpoints/yolov4-416',
                     'path to weights file')
-flags.DEFINE_integer('size', 608, 'resize images to')
+flags.DEFINE_integer('size', 416, 'resize images to')
 flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny')
 flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4')
-flags.DEFINE_string('video', './data/road.avi', 'path to input video')
+flags.DEFINE_string('video', './data/road.mp4', 'path to input video')
+flags.DEFINE_float('iou', 0.45, 'iou threshold')
+flags.DEFINE_float('score', 0.25, 'score threshold')
 
 def main(_argv):
-    if FLAGS.tiny:
-        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
-        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
-    else:
-        STRIDES = np.array(cfg.YOLO.STRIDES)
-        if FLAGS.model == 'yolov4':
-            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
-        else:
-            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny)
-    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
-    XYSCALE = cfg.YOLO.XYSCALE
+    config = ConfigProto()
+    config.gpu_options.allow_growth = True
+    session = InteractiveSession(config=config)
+    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
     input_size = FLAGS.size
     video_path = FLAGS.video
 
     print("Video from: ", video_path )
     vid = cv2.VideoCapture(video_path)
 
-    if FLAGS.framework == 'tf':
-        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
-        if FLAGS.tiny:
-            feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
-            bbox_tensors = []
-            for i, fm in enumerate(feature_maps):
-                bbox_tensor = decode(fm, NUM_CLASS, i)
-                bbox_tensors.append(bbox_tensor)
-            model = tf.keras.Model(input_layer, bbox_tensors)
-            utils.load_weights_tiny(model, FLAGS.weights)
-        else:
-            if FLAGS.model == 'yolov3':
-                feature_maps = YOLOv3(input_layer, NUM_CLASS)
-                bbox_tensors = []
-                for i, fm in enumerate(feature_maps):
-                    bbox_tensor = decode(fm, NUM_CLASS, i)
-                    bbox_tensors.append(bbox_tensor)
-                model = tf.keras.Model(input_layer, bbox_tensors)
-                utils.load_weights_v3(model, FLAGS.weights)
-            elif FLAGS.model == 'yolov4':
-                feature_maps = YOLOv4(input_layer, NUM_CLASS)
-                bbox_tensors = []
-                for i, fm in enumerate(feature_maps):
-                    bbox_tensor = decode(fm, NUM_CLASS, i)
-                    bbox_tensors.append(bbox_tensor)
-                model = tf.keras.Model(input_layer, bbox_tensors)
-                
-                if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights":
-                    utils.load_weights(model, FLAGS.weights)
-                else:
-                    model.load_weights(FLAGS.weights).expect_partial()
-
-        model.summary()
-    else:
-        # Load TFLite model and allocate tensors.
+    if FLAGS.framework == 'tflite':
         interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
         interpreter.allocate_tensors()
-        # Get input and output tensors.
         input_details = interpreter.get_input_details()
         output_details = interpreter.get_output_details()
         print(input_details)
         print(output_details)
+    else:
+        saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
+        infer = saved_model_loaded.signatures['serving_default']
 
     while True:
         return_value, frame = vid.read()
@@ -86,26 +54,37 @@ def main(_argv):
         else:
             raise ValueError("No image! Try with another video format")
         frame_size = frame.shape[:2]
-        image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size])
+        image_data = cv2.resize(frame, (input_size, input_size))
+        image_data = image_data / 255.
         image_data = image_data[np.newaxis, ...].astype(np.float32)
         prev_time = time.time()
 
-        if FLAGS.framework == 'tf':
-            pred_bbox = model.predict(image_data)
-        else:
+        if FLAGS.framework == 'tflite':
             interpreter.set_tensor(input_details[0]['index'], image_data)
             interpreter.invoke()
-            pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
-
-        if FLAGS.model == 'yolov4':
-            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE)
+            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
+            if FLAGS.model == 'yolov4' and FLAGS.tiny == True:
+                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25)
+            else:
+                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25)
         else:
-            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES)
-
-        bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25)
-        bboxes = utils.nms(bboxes, 0.213, method='nms')
+            batch_data = tf.constant(image_data)
+            pred_bbox = infer(batch_data)
+            for key, value in pred_bbox.items():
+                boxes = value[:, :, 0:4]
+                pred_conf = value[:, :, 4:]
 
-        image = utils.draw_bbox(frame, bboxes)
+        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
+            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
+            scores=tf.reshape(
+                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
+            max_output_size_per_class=50,
+            max_total_size=50,
+            iou_threshold=FLAGS.iou,
+            score_threshold=FLAGS.score
+        )
+        pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
+        image = utils.draw_bbox(frame, pred_bbox)
         curr_time = time.time()
         exec_time = curr_time - prev_time
         result = np.asarray(image)