forked from mindspore-lab/mindocr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsynthadd.py
36 lines (28 loc) · 1.33 KB
/
synthadd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import re
from tools.dataset_converters.utils.lmdb_writer import create_lmdb_dataset
class SYNTHADD_Converter:
def __init__(self, *args, **kwargs):
self._image_dir = None
def convert(self, task="rec_lmdb", image_dir=None, label_path=None, output_path=None):
if task == "rec_lmdb":
self.convert_rec_lmdb(image_dir, output_path)
else:
raise ValueError(f"Unsupported task `{task}`.")
def convert_rec_lmdb(self, image_dir=None, output_path=None):
self._image_dir = image_dir
folders = [f"crop_img_{i}" for i in range(1, 21)]
annotations = [f"annotationlist/gt_{i}.txt" for i in range(1, 21)]
folders = [os.path.join(image_dir, x) for x in folders]
annotations = [os.path.join(image_dir, x) for x in annotations]
images, labels = [], []
for folder, anno in zip(folders, annotations):
with open(anno, "r") as f:
for line in f:
content = re.findall(r"(\w+.jpg),\"(.+)\"\n", line)
assert len(content) == 1, line
image_path, label = content[0]
image_path = os.path.join(folder, image_path)
images.append(image_path)
labels.append(label)
create_lmdb_dataset(images, labels, output_path)