-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_detection_app.py
172 lines (146 loc) · 6.84 KB
/
text_detection_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import cv2
import os
import numpy as np
from tkinter import Label, Frame, Button, Tk, LEFT
from PIL import ImageTk, Image
import decodeBox as cv2DecodeBoxes
from tkinter import filedialog as fd
class App:
"""This class is the main class of the application.
It creates the window and the widgets and handles the button events.
"""
def __init__(self):
# set path to temp_image folder set start image for view in gui
self.temp_im_path = os.getcwd() + "/temp_image/"
self.preview_image = Image.open(os.getcwd() + "/example_images/start.jpg")
# create window, set title, set geometry
self.root = Tk()
self.root.title("Scene Text Detection")
self.root.geometry("1920x1080")
# create widgets, split layout in upper and lower frame
self.upper_frame = Frame(self.root, width=1920, height=880)
self.lower_frame = Frame(self.root, width=1920, height=200)
self.start_im = ImageTk.PhotoImage(self.preview_image)
self.image = Label(self.upper_frame, image=self.start_im)
self.image.image = self.start_im
self.btn_load = Button(self.lower_frame,text="Load Image",command=self.select_file, width=10,height=5,bg="White",fg="Black",)
self.btn_nms = Button(self.lower_frame, text="NMS",command= lambda: self.detect_text(False,True), width=10,height=5,bg="White",fg="Black",)
self.btn_bounding = Button(self.lower_frame,text="Bounding Boxes", command= lambda: self.detect_text(False, False), width=10,height=5,bg="White",fg="Black",)
self.btn_score = Button(self.lower_frame,text="Score Map", command= lambda: self.detect_text(True, False), width=10,height=5,bg="White",fg="Black",)
# use pack() layout manager, this centers everything which is very easy
self.upper_frame.pack()
self.lower_frame.pack()
self.image.pack()
self.btn_load.pack(side=LEFT, padx= 50, pady=25)
self.btn_score.pack(side=LEFT, padx= 50, pady=25)
self.btn_bounding.pack(side=LEFT, padx= 50, pady=25)
self.btn_nms.pack(side=LEFT, padx= 50, pady=25)
# downsize image to max_width and max_height for image display in gui
def downsize_image(self, image, max_width, max_height):
if len(image.shape) == 2:
height, width = image.shape
else:
height, width, _ = image.shape
# Calculate the aspect ratio
aspect_ratio = width / height
# Determine the new dimensions while preserving the aspect ratio
if width > height:
new_width = min(width, max_width)
new_height = int(new_width / aspect_ratio)
if new_height > max_height:
new_height = min(height, max_height)
new_width = int(new_height * aspect_ratio)
else:
new_height = min(height, max_height)
new_width = int(new_height * aspect_ratio)
# downsize the image using OpenCV
downsized_image = cv2.resize(image, (new_width, new_height))
return downsized_image
# open file dialog to select image, set temp image and display image in window
def select_file(self):
filetypes = (
('jpg files', '*.jpg'),
('png files', '*.png')
)
image_path = fd.askopenfilename(
title='Open an image',
initialdir= os.getcwd() + "/example_images/",
filetypes=filetypes)
self.set_temp_image(image_path)
self.update_image(cv2.imread(image_path))
# update image in gui window
def update_image(self, image):
downsized_image = self.downsize_image(image, 1900, 800)
self.preview_image = ImageTk.PhotoImage(self.cv_to_pil(downsized_image))
self.image.config(image=self.preview_image)
self.image.image = self.preview_image
# save temp image in the temp_image folder
def set_temp_image(self, im_path):
image = cv2.imread(im_path)
cv2.imwrite(self.temp_im_path + "temp.jpg", image)
# convert opencv image to pil image
def cv_to_pil(self, im):
# BGR to RGB image
image_rgb = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# numpy array to PIL Image
pil_image = Image.fromarray(image_rgb)
return pil_image
def detect_text(self, score_flag, nms):
# Settings
confThreshold = 0.5
nmsThreshold = 0.4
inpWidth = 960
inpHeight = 960
bbox_line_width = 2
# Load the detection model and create a list of output names
outputNames = []
detection_model = cv2.dnn.readNet("EAST.pb")
outputNames.extend([
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"])
# Open the current temp image
img = cv2.imread(self.temp_im_path + "temp.jpg")
# Get image height and width
height_ = img.shape[0]
width_ = img.shape[1]
rW = width_ / float(inpWidth)
rH = height_ / float(inpHeight)
# Create a blob from image and run the model
blob = cv2.dnn.blobFromImage(img, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False)
detection_model.setInput(blob)
model_output = detection_model.forward(outputNames)
# Get scores and geometry
scores = model_output[0]
geometry = model_output[1]
if score_flag:
# Create score map image
score_map = np.array(scores[0][0])
result = np.where(score_map > 0.5, 255, 0)
score_map_im = result.astype(np.uint8)
score_map_im = cv2.resize(score_map_im, (width_, height_))
self.update_image(score_map_im)
else:
# decode bounding boxes to readable opencv format
[boxes, confidences] = cv2DecodeBoxes.decodeBoundingBoxes(scores, geometry, confThreshold)
if nms:
# Apply NMS
indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold)
else:
# Take all boxes
indices = list(range(len(boxes)))
for i in indices:
# get 4 corners of the rotated rect
corner_points = cv2.boxPoints(boxes[i])
# scale the bounding box coordinates back
for j in range(4):
corner_points[j][0] *= rW
corner_points[j][1] *= rH
# draw lines for the bounding box
for j in range(4):
p1 = (int(corner_points[j][0]), int(corner_points[j][1]))
p2 = (int(corner_points[(j + 1) % 4][0]), int(corner_points[(j + 1) % 4][1]))
cv2.line(img, p1, p2, (0, 255, 0), thickness=bbox_line_width)
self.update_image(img)
if __name__ == "__main__":
app = App()
app.root.mainloop()