forked from mindspore-lab/mindocr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathccpd.py
189 lines (172 loc) · 5.77 KB
/
ccpd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import json
from pathlib import Path
from tqdm import tqdm
class CCPD_Converter:
"""
Format annotation to standard form for CCPD2019 and CCPD-Green datasets.
CCPD2019 is a dataset for license plate (lp) text detection and recognition.
CCPD2019 train, validation and test splits lists are located under `CCPD/splits` directory. These lists must be
supplied to the converter with the `label_path` argument. CCPD-Green data is already separated into different
folders and thus does not require `label_path` argument.
The ground truths are embedded into the filenames of the images of the dataset, so there are no additional
annotation file(s). Thus, the filenames follow a specific format:
`<area ratio>-<tilt>-<bbox coords>-<vertices>-<lp number>-<brightness>-<blurriness>`
The 'area ratio', 'brightness', and 'blurriness' properties are simple integers.
The 'tilt' property is split into further two: `<horizontal tilt>_<vertical tilt>`.
The 'bbox coords' property provides top-left and bottom-right coords, respectively: `<x1>&<y1>_<x2>&<y2>`
The 'vertices' property provides the points for the polygon: `<x1>&<y1>_<x2>&<y2>_<x3>&<y3>_<x4>&<y4>`
The 'lp number' property provides the transcription as explained here:
https://github.com/detectRecog/CCPD#dataset-annotations
Each image is assumed to have only one license plate (lp). The information about the one lp is embedded
into the file name of the image.
Note: the lp number consists of a province as a Chinese character and the remaining characters are English
alphanumeric characters.
"""
def __init__(self, path_mode="relative", **kwargs):
self._relative = path_mode == "relative"
self.provinces = [
"皖",
"沪",
"津",
"渝",
"冀",
"晋",
"蒙",
"辽",
"吉",
"黑",
"苏",
"浙",
"京",
"闽",
"赣",
"鲁",
"豫",
"鄂",
"湘",
"粤",
"桂",
"琼",
"川",
"贵",
"云",
"藏",
"陕",
"甘",
"青",
"宁",
"新",
"警",
"学",
"O",
]
self.alphabets = [
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"H",
"J",
"K",
"L",
"M",
"N",
"P",
"Q",
"R",
"S",
"T",
"U",
"V",
"W",
"X",
"Y",
"Z",
"O",
]
self.ads = [
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"H",
"J",
"K",
"L",
"M",
"N",
"P",
"Q",
"R",
"S",
"T",
"U",
"V",
"W",
"X",
"Y",
"Z",
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"O",
]
def convert(self, task="det", image_dir=None, label_path=None, output_path=None):
if label_path:
label_path = Path(label_path)
assert label_path.exists(), f"{label_path} does not exist!"
if task == "det":
self._format_det_label(Path(image_dir), label_path, output_path)
else:
raise ValueError("The CCPD dataset currently supports detection only!")
def _format_det_label(self, image_dir: Path, label_path: Path, output_path: str):
img_paths = []
if label_path:
with open(label_path, "r") as f:
for line in f.readlines():
img_path = image_dir / line.strip().split("/")[1]
assert img_path.exists(), f"Image {img_path} not found."
img_paths.append(img_path)
else:
img_paths = list(image_dir.iterdir())
with open(output_path, "w", encoding="utf-8") as out_file:
for img_path in tqdm(img_paths):
area, tilt, bbox, vertices, lp, brightness, blurriness = img_path.stem.split("-")
h_tilt, v_tilt = [int(x) for x in tilt.split("_")]
bbox = [[int(x) for x in coordinates.split("&")] for coordinates in bbox.split("_")] # reshape (2, 2)
points = [
[int(x) for x in coordinates.split("&")] for coordinates in vertices.split("_")
] # reshape (N, 2)
province = self.provinces[int(lp.split("_")[0])]
alphabet = self.alphabets[int(lp.split("_")[1])]
ad = ""
for i in lp.split("_")[2:]:
ad += self.ads[int(i)]
lp_text = province + alphabet + ad
label = [
{
"area": int(area),
"h_tilt": h_tilt,
"v_tilt": v_tilt,
"bbox": bbox,
"points": points,
"transcription": lp_text,
"brightness": int(brightness),
"blurriness": int(blurriness),
}
]
img_path = img_path.name if self._relative else str(img_path)
out_file.write(img_path + "\t" + json.dumps(label, ensure_ascii=False) + "\n")