From d145b8c421efda384dc85817bdd12b5d34da0459 Mon Sep 17 00:00:00 2001 From: Hariom_Nagar Date: Thu, 4 Sep 2025 12:06:17 +0530 Subject: [PATCH] license_plate_detection_yunet: fix NMSBoxes input (use [x,y,w,h] built from 4-point corners); keep original quad dets. Fixes #275 --- .../lpd_yunet.py | 78 ++++++++++++------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/models/license_plate_detection_yunet/lpd_yunet.py b/models/license_plate_detection_yunet/lpd_yunet.py index 917e58a3..21aa5c42 100644 --- a/models/license_plate_detection_yunet/lpd_yunet.py +++ b/models/license_plate_detection_yunet/lpd_yunet.py @@ -1,13 +1,15 @@ from itertools import product - import numpy as np import cv2 as cv + class LPD_YuNet: - def __init__(self, modelPath, inputSize=[320, 240], confThreshold=0.8, nmsThreshold=0.3, topK=5000, keepTopK=750, backendId=0, targetId=0): + def __init__(self, modelPath, inputSize=[320, 240], confThreshold=0.8, + nmsThreshold=0.3, topK=5000, keepTopK=750, + backendId=0, targetId=0): self.model_path = modelPath self.input_size = np.array(inputSize) - self.confidence_threshold=confThreshold + self.confidence_threshold = confThreshold self.nms_threshold = nmsThreshold self.top_k = topK self.keep_top_k = keepTopK @@ -19,12 +21,12 @@ def __init__(self, modelPath, inputSize=[320, 240], confThreshold=0.8, nmsThresh self.steps = [8, 16, 32, 64] self.variance = [0.1, 0.2] - # load model + # Load model self.model = cv.dnn.readNet(self.model_path) - # set backend and target self.model.setPreferableBackend(self.backend_id) self.model.setPreferableTarget(self.target_id) - # generate anchors/priorboxes + + # Generate anchors/priorboxes self._priorGen() @property @@ -39,15 +41,16 @@ def setBackendAndTarget(self, backendId, targetId): def setInputSize(self, inputSize): self.input_size = inputSize - # re-generate anchors/priorboxes self._priorGen() def _preprocess(self, image): return cv.dnn.blobFromImage(image) def infer(self, image): - assert image.shape[0] == self.input_size[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self.input_size[1]) - assert image.shape[1] == self.input_size[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self.input_size[0]) + assert image.shape[0] == self.input_size[1], \ + f"{image.shape[0]} (height of input image) != {self.input_size[1]} (preset height)" + assert image.shape[1] == self.input_size[0], \ + f"{image.shape[1]} (width of input image) != {self.input_size[0]} (preset width)" # Preprocess inputBlob = self._preprocess(image) @@ -58,26 +61,44 @@ def infer(self, image): # Postprocess results = self._postprocess(outputBlob) - return results def _postprocess(self, blob): - # Decode + # Decode outputs dets = self._decode(blob) - # NMS + # dets shape: [x1,y1,x2,y2,x3,y3,x4,y4,score] + pts = dets[:, :-1].reshape(-1, 8) # N x 8 corners + scores = dets[:, -1].astype(float).tolist() + + # Convert corners → [x,y,w,h] for NMS + bboxes = [] + for p in pts: + xs = p[0::2] + ys = p[1::2] + x_min, y_min = float(xs.min()), float(ys.min()) + w, h = float(xs.max() - x_min), float(ys.max() - y_min) + bboxes.append([x_min, y_min, w, h]) + keepIdx = cv.dnn.NMSBoxes( - bboxes=dets[:, 0:4].tolist(), - scores=dets[:, -1].tolist(), + bboxes=bboxes, + scores=scores, score_threshold=self.confidence_threshold, nms_threshold=self.nms_threshold, top_k=self.top_k - ) # box_num x class_num - if len(keepIdx) > 0: - dets = dets[keepIdx] - return dets[:self.keep_top_k] - else: - return np.empty(shape=(0, 9)) + ) + + # Normalize keepIdx across OpenCV versions + if isinstance(keepIdx, tuple): + keepIdx = keepIdx[0] + if len(keepIdx) == 0: + return np.empty((0, dets.shape[1]), dtype=dets.dtype) + + keepIdx = np.array(keepIdx).reshape(-1) + + # Keep original quadrilateral detections + dets = dets[keepIdx] + return dets[:self.keep_top_k] def _priorGen(self): w, h = self.input_size @@ -98,36 +119,33 @@ def _priorGen(self): priors = [] for k, f in enumerate(feature_maps): min_sizes = self.min_sizes[k] - for i, j in product(range(f[0]), range(f[1])): # i->h, j->w + for i, j in product(range(f[0]), range(f[1])): # i→h, j→w for min_size in min_sizes: s_kx = min_size / w s_ky = min_size / h - cx = (j + 0.5) * self.steps[k] / w cy = (i + 0.5) * self.steps[k] / h - priors.append([cx, cy, s_kx, s_ky]) self.priors = np.array(priors, dtype=np.float32) def _decode(self, blob): loc, conf, iou = blob + # get score cls_scores = conf[:, 1] iou_scores = iou[:, 0] + # clamp - _idx = np.where(iou_scores < 0.) - iou_scores[_idx] = 0. - _idx = np.where(iou_scores > 1.) - iou_scores[_idx] = 1. + iou_scores = np.clip(iou_scores, 0., 1.) scores = np.sqrt(cls_scores * iou_scores) scores = scores[:, np.newaxis] scale = self.input_size - # get four corner points for bounding box + # get four corner points bboxes = np.hstack(( - (self.priors[:, 0:2] + loc[:, 4: 6] * self.variance[0] * self.priors[:, 2:4]) * scale, - (self.priors[:, 0:2] + loc[:, 6: 8] * self.variance[0] * self.priors[:, 2:4]) * scale, + (self.priors[:, 0:2] + loc[:, 4:6] * self.variance[0] * self.priors[:, 2:4]) * scale, + (self.priors[:, 0:2] + loc[:, 6:8] * self.variance[0] * self.priors[:, 2:4]) * scale, (self.priors[:, 0:2] + loc[:, 10:12] * self.variance[0] * self.priors[:, 2:4]) * scale, (self.priors[:, 0:2] + loc[:, 12:14] * self.variance[0] * self.priors[:, 2:4]) * scale ))