-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
358 lines (340 loc) · 17.9 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
import re
class PythonToHTML:
def __init__(self, input_py_name, output_html_name):
# 輸入與輸出檔案名稱
self.input_py_name = input_py_name + ".py"
self.output_html_name = output_html_name + ".html"
# 定義所需資料
self.py = str() # 檔案內容
self.colored = list() # 已上色 index 列表,type : list[int]
self.coloring_list = list() # 欲上色列表,type : list[list[int, int, str]],意義為[開始index, 結束index, 著色類別]
# 定義所有關鍵字和初始化搜尋模組與自訂類別、函式清單
# class='module'
self.type_list = ["type", "int", "float", "str", "tuple", "list", "dict", "set", "bool"]
self.module_list = list()
self.class_list = list()
# class='str'
self.quot_list = "\"\'"
# class='func'
self.func_list = ['staticmethod', 'classmethod', 'breakpoint', 'isinstance', 'issubclass', 'memoryview', 'bytearray', 'enumerate', 'frozenset', 'callable', 'property', 'reversed', 'compile', 'complex', 'delattr', 'getattr', 'globals', 'hasattr', 'setattr', 'divmod', 'filter', 'format', 'locals', 'object', 'sorted', 'aiter', 'anext', 'ascii', 'bytes', 'input', 'print', 'range', 'round', 'slice', 'super', 'eval', 'exec', 'hash', 'help', 'iter', 'next', 'open', 'repr', 'vars', 'abs', 'all', 'any', 'bin', 'chr', 'dir', 'hex', 'len', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'sum', 'zip', 'id']
self.def_list = list()
# class='keyword1'
self.keyword_list1 = ["nonlocal", "global", "lambda", "class", "and", "def", "not", "is", "or", "@"]
self.bool_list = ['False', 'True', 'None']
# class='keyword2'
self.keyword_list2 = ["continue", "finally", "assert", "except", "import", "return", "except", "break", "raise", "while", "yield", "while", "elif", "else", "from", "pass", "with", "del", "for", "try", "as", "if", "in"]
# class='op'
self.op_list = "=+-*/%&|^>!~,:"
# class='brackets'
self.brackets_list = "()[]{}"
def main(self): # 主程式
self.read_py() # 讀取 py 檔,並存成 self.py(str)
self.py = self.py.replace("<", "<")
self.find_comment() # 尋找註解(class='comment')
self.find_str() # 尋找字串(class='str')
self.find_type() # 尋找型別(class='module')
self.find_module() # 尋找模組(class='module')
self.find_class() # 尋找類別(class='module')
self.find_func() # 尋找內建函式(class='func')
self.find_def() # 尋找自訂函式(class='func')
self.find_keyword() # 尋找關鍵字(class='keyword1' and 'keyword2')
self.find_bool_and_None() # 尋找布林值和空值(class='keyword1')
self.find_lt() # 尋找 < 的 HTML格式(class='op')
self.find_op() # 尋找運算符號(class='op')
self.find_brackets() # 尋找括號(class='brackets')
self.find_number() # 尋找數字(class='number')
self.add_span() # 加入所有 span 標籤
self.add_html_exception() # 處理 HTML 例外格式
self.to_html() # 轉為 HTML
def read_py(self): # 讀取 py 檔,並存成 self.py(str)
with open(self.input_py_name, "r", encoding="utf-8") as f:
# 頭尾加換行符號是方便做型別判斷處理,最後會刪除
self.py = "\n"
for line in f: self.py += line
self.py += "\n"
def find_comment(self): # 尋找註解(class='comment')
for i in range(len(self.py)):
if self.py[i] == "#":
end = i + self.py[i:].find("\n")
if (self.py[i:end].count("\"") + self.py[i:end].count("\'"))%2 == 0:
data = [i, end, "comment"]
self.add_coloring(data)
def find_str(self): # 尋找字串(class='str')
dqi_list = list() # double quotation index list (記錄雙引號位置)
sqi_list = list() # single quotation index list (記錄單引號位置)
# 紀錄引號位置
for j in range(len(self.py)):
if self.py[j] in "\"\'" and self.py[j-1] != "\\" and j not in self.colored: # 跳過「\'」、「\"」字元
if self.py[j] == "\"": dqi_list.append(j) # 紀錄雙引號位置
else: sqi_list.append(j) # 紀錄單引號位置
# 處理 2 雙引包 1 單引(或反之)
all_list = sorted(sqi_list + dqi_list + self.search_all("\n", self.py))
split_list = list()
start = 0
for i in range(len(all_list)):
if self.py[all_list[i]]=="\n":
split_list.append(all_list[start:i])
start = i + 1
for line in split_list:
if len(line) >= 3:
for j in range(len(line)-2):
if self.py[line[j]] == self.py[line[j+2]] and \
self.py[line[j]] != self.py[line[j+1]]:
if self.py[line[j+1]] == "\"":
dqi_list.remove(line[j+1])
else:
sqi_list.remove(line[j+1])
# 因引號為兩兩一組出現,故把 list 分割為兩兩一組
# (因註解與2包1和「\'」、「\"」已處理完,在程式碼無 SyntaxError 前提下,可直接分兩兩一組)
dqi_list = [[dqi_list[j], dqi_list[j+1], "str"] for j in range(0, len(dqi_list), 2)]
sqi_list = [[sqi_list[j], sqi_list[j+1], "str"] for j in range(0, len(sqi_list), 2)]
# 若某單引號包裹著雙引號,或者反之,則必須把被包裹的引號對刪除,只保留外部的引號對
for d in range(len(dqi_list)):
for s in range(len(sqi_list)):
compare = [dqi_list[d][0] , dqi_list[d][1], sqi_list[s][0], sqi_list[s][1]]
# 若某對引號被包裹,則此區塊會將它們的值改為 [None, None]
# 若無兩對比較引號皆不為 None,且無互相包裹情況,則不做任何處理
if None not in compare:
if compare[0] < compare[2] < compare[3] < compare[1]:
sqi_list[s][0] = sqi_list[s][1] = None
elif compare[2] < compare[0] < compare[1] < compare[3]:
dqi_list[d][0] = dqi_list[d][1] = None
# 將為 [None, None] 的引號對刪除
# (因屬於 for 迴圈,要走訪並刪除就必須反向讀取,避免 IndexError)
for d in range(len(dqi_list)-1, -1, -1):
if dqi_list[d][0] == None:
del dqi_list[d]
for s in range(len(sqi_list)-1, -1, -1):
if sqi_list[s][0] == None:
del sqi_list[s]
qi_list = sorted(dqi_list + sqi_list)
# 處理 f-string 內的{}
for i in range(len(qi_list)-1, -1, -1):
start = qi_list[i][0]
end = qi_list[i][1]
# 跳過 f-string 內的雙大括號
py_copy = self.py[:].replace("{{", "🧡🧡").replace("}}", "🌟🌟")
if py_copy[start-1] == "f" and "{" in py_copy[start:end+1] and "}" in py_copy[start:end+1]:
sign_list = list()
for j in range(len(py_copy[start:end+1])):
if py_copy[start + j] in "\"\'{}":
sign_list.append([start + j, py_copy[start + j]])
for j in range(len(sign_list)-1):
cur_ind = sign_list[j][0]
cur = sign_list[j][1]
next_ind = sign_list[j+1][0]
next = sign_list[j+1][1]
if cur in "\"\'" and next == "{":
if cur_ind + 1 == next_ind: continue
if j != 0: cur_ind += 1
next_ind -= 1
elif cur == "}" and next in "\"\'":
cur_ind += 1
elif cur == "}" and next == "{":
if cur_ind + 1 == next_ind: continue
cur_ind += 1
next_ind -= 1
elif cur in "\"\'" and next in "\"\'":
if j!=0: cur_ind += 1
else:
continue
qi_list.append([cur_ind, next_ind, "str"])
del qi_list[i]
# 加入著色列表
for qi in qi_list:
self.add_coloring(qi)
# 處理 f-string 的 f
f_ind = qi[0] - 1
if self.py[f_ind] == "f":
self.add_coloring([f_ind, f_ind, "keyword1"])
def find_type(self): # 尋找型別(class='module')
sign = "\n ,()[]{}"
self.add_coloring_and_detect_sign(self.type_list, sign, "module")
def find_module(self): # 尋找模組(class='module')
import_list = ["import", "from", "as", "*", ""] # 尋找匯入模組的關鍵字列表
py_copy = "".join(self.py)
py_copy = py_copy.split("\n")
py_copy = [line for line in py_copy if "import" in line]
py_copy = [item for line in py_copy for item in line.split()]
for i in range(len(py_copy)-1, -1, -1):
if "." in py_copy[i]:
py_copy.extend(py_copy[i].split("."))
del py_copy[i]
for f in py_copy:
m = ""
for s in f:
if s.isalpha() or s.isdigit() or s == "_": m += s
self.module_list.append(m)
self.module_list = [m for m in list(set(self.module_list)) if m not in import_list]
sign = " \n.,()" # 模組名稱前後合法字元
self.add_coloring_and_detect_sign(self.module_list, sign, "module")
def find_class(self): # 尋找類別(class='module')
for ind in self.search_all("class", self.py):
start = ind + len("class")
done = False
while True:
if self.is_name(self.py[start], first = True):
end = start
while True:
if not self.is_name(self.py[end], first = False):
done = True
break
end += 1
if done:
break
start += 1
self.class_list.append(self.py[start:end])
sign = " \n=:()[]{}"
self.add_coloring_and_detect_sign(self.class_list, sign, "module")
def find_func(self): # 尋找內建函式(class='func')
sign = " \n+-*/%=[](){}"
self.add_coloring_and_detect_sign(self.func_list, sign, "func")
def find_def(self): # 尋找自訂函式(class='func')
for ind in self.search_all("def", self.py):
start = ind + len("def")
done = False
while True:
if self.is_name(self.py[start], first = True):
end = start
while True:
if not self.is_name(self.py[end], first = False):
done = True
break
end += 1
if done:
break
start += 1
self.def_list.append(self.py[start:end])
sign = " \n+-*/%=.[](){}"
self.add_coloring_and_detect_sign(self.def_list, sign, "func")
def find_keyword(self): # 尋找關鍵字(class='keyword1' and 'keyword2')
sign = " \n:"
self.add_coloring_and_detect_sign(self.keyword_list1, sign, "keyword1")
self.add_coloring_and_detect_sign(self.keyword_list2, sign, "keyword2")
def find_bool_and_None(self): # 尋找布林值和空值(class='keyword1')
sign = " \n:()[]{}="
self.add_coloring_and_detect_sign(self.bool_list, sign, "keyword1")
def find_lt(self): # 尋找 < 的 HTML格式(class='op')
for lt in self.search_all("<", self.py):
self.add_coloring([lt, lt+2, "op"])
def find_op(self): # 尋找運算符號(class='op')
self.add_coloring_at_single_char(self.op_list, "op")
def find_brackets(self): # 尋找括號(class='brackets')
self.add_coloring_at_single_char(self.brackets_list, "brackets")
def find_number(self): # 尋找數字(class='number')
sign = [' ', '\n', ':', '+', '-', '*', '/', '%', '=', ',', '(', ')', '[', ']', '{', '}', '>']
detected = False
for i in range(len(self.py)):
if self.is_number(self.py[i]) and (self.py[i-1] in sign or self.py[i-3:i] == "<"):
start = i
detected = True
if detected:
if self.is_number(self.py[i]) and (self.py[i + 1] in sign or self.py[i+1:i+4] == "<"):
end = i
data = [start, end, "number"]
self.add_coloring(data)
detected = False
def add_span(self): # 加入所有 span 標籤
self.coloring_list.sort(reverse=True)
for loc in self.coloring_list:
start = loc[0]
end = loc[1] + 1
Class = loc[2]
self.py = f"{self.py[:start]}<span class='{Class}'>{self.py[start:end]}</span>{self.py[end:]}"
def add_html_exception(self): # 將程式碼內空格轉換為 HTML 格式
py_list = []
py_copy = self.py[:]
while py_copy!="": # 重複執行到全部切分完
if "<span" not in py_copy: # 若此區塊無 <span (或已切分完)則直接 pop 到 new_file_list
py_list.append(py_copy)
break
start = py_copy.find("<span") # 抓取最前面的 <span
end = py_copy.find("</span>") + 7 # 抓取最前面的 </span>
py_list.append(py_copy[:start]) # 將 <span 前的區塊切分進 new_file_list
py_list.append(py_copy[start:end]) # 將 <span></span> 切分進 new_file_list
py_copy = py_copy[end:] # 將以上處理完的區塊刪除,往下輪 while 處理
for i in range(len(py_list)-1, -1, -1):
# # 刪除切分多餘的空元素
# if py_list[i] == "":
# del py_list[i]
# 將不在 span 標籤內的空字元轉為 HTML 格式(否則連續空格會被縮成一個空格)
if not py_list[i].startswith("<span"):
py_list[i] = py_list[i].replace(" ", " ")
# 將在字串 span 標籤內的 < 轉為 HTML 格式(否則會被誤認為標籤符號)
if py_list[i].startswith("<span class='str'>"):
if "<" in py_list[i][19:-8]: # WHY?
py_list[i] = py_list[i][:19] + py_list[i][19:-8].replace("<", "<") + py_list[i][-8:]
self.py = "".join(py_list)
def to_html(self): # 轉為 HTML
self.py = self.py.split("\n")
self.py = self.py[1:-1]
HTML = """
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
body {font-family: consolas;background-color: #37464a;color:black}
.code {border-collapse: collapse;color: #9cdcfe;background-color:#1a1b1c;width: 100%}
.table_num {text-align: right;width: 40px;color: grey}
.keyword1 {color: #569cd6}
.keyword2 {color: #c586c0}
.op {color: #d4d4d4}
.func {color: #dcdcaa}
.str {color: #ce9178}
.comment {color:#6a9955;font-style:italic}
.brackets {color:#ffd700}
.module {color:#4ec9b0}
.number {color:#b5cea8}
</style>
</head>
<body>
<table class='code'>
"""
for i in range(len(self.py)):
HTML += "<tr>"
HTML += f"<td class='table_num'>{i+1}</td>"
HTML += f"<td width='10px'></td>"
HTML += f"<td>{self.py[i]}</td>"
HTML += "</tr>"
HTML += """
</table>
</body>
</html>
"""
with open(self.output_html_name, "w", encoding="utf-8") as f:
f.write(HTML)
def search_all(self, target, s): # 搜尋 py 檔內所有指定字串的 index
return [_.start() for _ in re.finditer(target, s)]
def add_coloring(self, data): # 新增著色區塊
# 參數 data 格式 [頭索引 int, 尾索引 int, 著色名稱 str]
start = data[0]
end = data[1]
# 若該區塊已被占用則不加入著色
if (start in self.colored or end in self.colored):
return
# 若未被占用則加入到 self.coloring_list,並在 self.colored 內標註已占用
else:
self.coloring_list.append(data)
self.colored.extend([i for i in range(start, end + 1)])
def add_coloring_and_detect_sign(self, target_list, sign, class_name): # 新增著色區塊同時偵測左右字元
for t in target_list:
for start in self.search_all(t, self.py):
end = start + len(t) - 1
if self.py[start - 1] in sign and self.py[end + 1] in sign:
data = [start, end, class_name]
self.add_coloring(data)
def add_coloring_at_single_char(self, target_list, class_name): # 新增單字元著色區塊
for i in range(len(self.py)):
if self.py[i] in target_list:
data = [i, i, class_name]
self.add_coloring(data)
def is_name(self, s, first): # 確認是否為合法命名
if first:
return s.isalpha() or s == "_"
else:
return s.isdigit() or s.isalpha() or s == "_"
def is_number(self, s): # 確認是否為數字(int float 皆可判斷)
return s.isdigit() or s == "."
pth = PythonToHTML(input_py_name = "", output_html_name = "")
pth.main()