-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
120 lines (105 loc) · 4.33 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import re
def en_to_ar_num(number_string):
lis = []
dic = {
'0': '٠',
'1': '١',
'2': '٢',
'3': '٣',
'4': '٤',
'5': '٥',
'6': '٦',
'7': '٧',
'8': '٨',
'9': '٩',
}
for char in number_string:
if char in dic:
lis.append(dic[char])
else:
lis.append(char)
return "".join(lis)
def read(name):
print("reading")
f = open(name, encoding="utf8")
return f.read()
def write(array, name):
print('writing')
with open(name, "w", encoding="utf8") as f:
output = ""
output += "[\n"
surahId = -1
surahName = ""
verseId = 0
for i in range(len(array)):
if "#" in array[i]:
verse = array[i].strip()
surahName = verse.split("#")[0]
surahId += 1
verseId = 0
if surahId != 0:
# close previous verses array
output += "\t\t]"
output += "\n\t},\n"
output += "\t{\n"
output += "\t\t" + "\"id\": " + str(surahId) + ","
output += "\n\t\t" + \
"\"name\": {\n\t\t\t\"ar\": " + "\"" + \
surahName.strip() + "\"\n\t\t},"
# open verses array
output += "\n\t\t\"verses\": [\n"
output += "\t\t\t{\n"
output += "\t\t\t\t" + "\"id\": " + str(verseId) + ","
if verseId == 0 and surahId == 0:
# add basmalah as first verse to fatihah only
output += "\n\t\t\t\t" + \
"\"verse\": {\n\t\t\t\t\t\"ar\": " + "\"" + \
"بِسۡمِ ٱللَّهِ ٱلرَّحۡمَٰنِ ٱلرَّحِيمِ" + "\"\n\t\t\t\t}"
elif verseId == 0:
verse = array[i].strip().split("#")
output += "\n\t\t\t\t" + \
"\"verse\": {\n\t\t\t\t\t\"ar\": " + "\"" + \
verse[1].strip() + "\"\n\t\t\t\t}"
else:
output += "\n\t\t\t\t" + \
"\"verse\": {\n\t\t\t\t\t\"ar\": " + "\"" + \
array[i].strip() + "\"\n\t\t\t\t}"
output += "\n\t\t\t},\n"
verseId += 1
output += "\t\t],\n"
output += "\t},\n"
output += "]"
output = output.replace("},\n\t\t]", "}\n\t\t]")
output = output.replace("],\n\t},", "]\n\t}")
f.write(output)
def clean(text):
cleaned = text.replace("\n", " ")
cleaned = cleaned.replace("\t", " ")
cleaned = cleaned.replace(" ", " ")
cleaned = cleaned.replace("إِنَّهُۥ مِن سُلَيۡمَٰنَ وَإِنَّهُۥ بِسۡمِ ٱللَّهِ ٱلرَّحۡمَٰنِ ٱلرَّحِيمِ",
"%")
cleaned = cleaned.replace("بِسۡمِ ٱللَّهِ ٱلرَّحۡمَٰنِ ٱلرَّحِيمِ", "#")
cleaned = cleaned.replace("بِّسۡمِ ٱللَّهِ ٱلرَّحۡمَٰنِ ٱلرَّحِيمِ", "#")
cleaned = cleaned.replace("%",
"إِنَّهُۥ مِن سُلَيۡمَٰنَ وَإِنَّهُۥ بِسۡمِ ٱللَّهِ ٱلرَّحۡمَٰنِ ٱلرَّحِيمِ")
cleaned = cleaned.replace("سُورَةُ التَّوۡبَةِ بَرَآءَةٞ مِّنَ ٱللَّهِ وَرَسُولِهِۦٓ إِلَى ٱلَّذِينَ عَٰهَدتُّم مِّنَ ٱلۡمُشۡرِكِينَ",
"سُورَةُ التَّوۡبَةِ#بَرَآءَةٞ مِّنَ ٱللَّهِ وَرَسُولِهِۦٓ إِلَى ٱلَّذِينَ عَٰهَدتُّم مِّنَ ٱلۡمُشۡرِكِينَ")
cleaned = en_to_ar_num(cleaned)
# save for debug
with open("debug.txt", "w", encoding="utf8") as f:
f.write(cleaned)
#
verses = [x for x in range(300)]
verses = reversed(verses)
for verse in verses:
arN = en_to_ar_num(str(verse))
cleaned = cleaned.replace(arN, ";")
return cleaned
fullText = read("fullQuran.txt")
fullText = clean(fullText)
separated = re.split(";", fullText)
print("[" + separated[len(separated)-1] + "]")
for item in separated:
if item == " ":
separated.remove(item)
write(separated, 'output/result.json')