-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconvert_tifinagh_mnist_to_doctr_format.sh
executable file
·169 lines (149 loc) · 3.99 KB
/
convert_tifinagh_mnist_to_doctr_format.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# Script to convert the Tifinagh-MNIST repo dataset to the Doctr library format
# Usage: ./convert_tifinagh_mnist_to_doctr_format.sh <tifinagh-mnist-repo-dir> <train-dir> <test-dir>
: '
Tifinagh-MNIST format:
├── Tifinagh-MNIST
├── Dataset
├── test_data
├── 0
├── <filename>.png
├── ...
├── 1
├── 2
└── ...
├── 32
├── train_data
├── 0
├── 1
├── 2
└── ...
├── 32
└── ...
'
: '
Doctr format:
├── images
├── img_1.png
├── img_2.png
├── img_3.png
└── ...
├── labels.json
'
: '
labels.json format:
{
"img_1.png": "ⴰ",
"img_2.png": "ⴱ",
"img_3.png": "ⴳ",
"img_4.png": "ⴷ",
"img_5.png": "ⴽⵯ",
...
}
'
# Check if the number of arguments is correct
if [ $# -ne 3 ]; then
echo "Usage: ./convert_tifinagh_mnist_to_doctr_format.sh <tifinagh-mnist-repo-dir> <train-dir> <test-dir>"
exit 1
fi
# Check if the Tifinagh-MNIST repo directory exists
if [ ! -d $1 ]; then
echo "Directory $1 does not exist"
exit 1
fi
# Check if the train directory already exists
if [ -d $2 ]; then
echo "Directory $2 already exists"
exit 1
fi
# Check if the test directory already exists
if [ -d $3 ]; then
echo "Directory $3 already exists"
exit 1
fi
# Make sure the filenames are unique by adding a class number suffix
# to the filename
for i in {0..32}; do
for file in $1/Dataset/train_data/$i/*.png; do
filename=$(basename $file)
# Remove the .png extension
filename="${filename%.*}"
mv $file $1/Dataset/train_data/$i/$filename-$i.png
done
done
for i in {0..32}; do
for file in $1/Dataset/test_data/$i/*.png; do
filename=$(basename $file)
# Remove the .png extension
filename="${filename%.*}"
mv $file $1/Dataset/test_data/$i/$filename-$i.png
done
done
# Create the train and test directories
mkdir -p $2/images
mkdir -p $3/images
# Create the labels.json files
touch $2/labels.json
touch $3/labels.json
# Write the labels.json files
echo "{" >> $2/labels.json
echo "{" >> $3/labels.json
# Write the labels.json files and map the labels to the Tifinagh script
for i in {0..32}; do
# Map the labels to the Tifinagh script
case $i in
0) label="ⴰ";;
1) label="ⴱ";;
2) label="ⵛ";;
3) label="ⴷ";;
4) label="ⴻ";;
5) label="ⴼ";;
6) label="ⴳ";;
7) label="ⵀ";;
8) label="ⵉ";;
9) label="ⵊ";;
10) label="ⴽ";;
11) label="ⵍ";;
12) label="ⵎ";;
13) label="ⵏ";;
14) label="ⵇ";;
15) label="ⵔ";;
16) label="ⵙ";;
17) label="ⵜ";;
18) label="ⵓ";;
19) label="ⵡ";;
20) label="ⵅ";;
21) label="ⵢ";;
22) label="ⵣ";;
23) label="ⵃ";;
24) label="ⵚ";;
25) label="ⴹ";;
26) label="ⵟ";;
27) label="ⵄ";;
28) label="ⵖ";;
29) label="ⵥ";;
30) label="ⴳⵯ";;
31) label="ⴽⵯ";;
32) label="ⵕ";;
esac
for file in $1/Dataset/train_data/$i/*.png; do
filename=$(basename $file)
echo " \"$filename\": \"$label\"," >> $2/labels.json
done
for file in $1/Dataset/test_data/$i/*.png; do
filename=$(basename $file)
echo " \"$filename\": \"$label\"," >> $3/labels.json
done
done
# Remove the last comma from the labels.json files
sed -i '$ s/.$//' $2/labels.json
sed -i '$ s/.$//' $3/labels.json
# Close the labels.json files
echo "}" >> $2/labels.json
echo "}" >> $3/labels.json
# Move the images to the train and test directories
for i in {0..32}; do
mv $1/Dataset/train_data/$i/*.png $2/images
mv $1/Dataset/test_data/$i/*.png $3/images
done
echo "Done"
exit 0