-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSC.py
123 lines (111 loc) · 4.08 KB
/
SC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""
Pascal0 Scanner
Emil Sekerinski, March 2016
James Priebe, March 2016 - May 2017
"""
TIMES = 1; DIV = 2; MOD = 3; AND = 4; PLUS = 5; MINUS = 6
OR = 7; EQ = 8; NE = 9; LT = 10; GT = 11; LE = 12; GE = 13
PERIOD = 14; COMMA = 15; COLON = 16; RPAREN = 17; RBRAK = 18
OF = 19; THEN = 20; DO = 21; LPAREN = 22; LBRAK = 23; NOT = 24
BECOMES = 25; NUMBER = 26; IDENT = 27; SEMICOLON = 28
END = 29; ELSE = 30; IF = 31; WHILE = 32; ARRAY = 33
RECORD = 34; CONST = 35; TYPE = 36; VAR = 37; PROCEDURE = 38
BEGIN = 39; PROGRAM = 40; EOF = 41
keywords = ((DO, 'do'), (IF, 'if'), (OF, 'of'), (OR, 'or'),
(AND, 'and'), (NOT, 'not'), (END, 'end'), (MOD, 'mod'),
(VAR, 'var'), (ELSE, 'else'), (THEN, 'then'),
(TYPE, 'type'), (ARRAY, 'array'), (BEGIN, 'begin'),
(CONST, 'const'), (WHILE, 'while'), (RECORD, 'record'),
(PROCEDURE, 'procedure'), (DIV, 'div'), (PROGRAM, 'program'))
# (line, pos) is the location of the current symbol in source
# (lastline, lastpos) is used to more accurately report errors
# (errline, errpos) is used to suppress multiple errors at the same location
# ch is the current character and sym the current symbol
# if sym is NUMBER, val is the number, if sym is IDENT, val is the identifier
# source is the string with the source program
def getErrors():
if len(errors):
return errors
else:
return False
def init(src, suppress):
global line, lastline, errline, pos, lastpos, errpos, errors
global sym, val, error, source, index
global suppress_errors
errors = []
suppress_errors = suppress
line, lastline, errline = 1, 1, 1
pos, lastpos, errpos = 0, 0, 0
sym, val, error, source, index = None, None, False, src, 0
getChar(); getSym()
def getChar():
global line, lastline, pos, lastpos, ch, source, index
if index == len(source): ch = chr(0)
else:
ch, index = source[index], index+1
lastpos = pos
if ch == '\n':
pos, line = 0, line+1
else:
lastline, pos = line, pos+1
def number():
global sym, val
sym, val = NUMBER, 0
while '0' <= ch <= '9':
val = 10*val+int(ch)
getChar()
if val >= 2**31:
mark('number too large', 200); val = 0
def ident():
global sym, val
start = index - 1
while ('A' <= ch <= 'Z') or ('a' <= ch <= 'z') or \
('0' <= ch <= '9'): getChar()
for kw, s in keywords:
if source[start:index-1] == s:
sym = kw; return
sym, val = IDENT, source[start:index-1]
def comment():
while chr(0) != ch != '}': getChar()
if ch == chr(0): mark('comment not terminated', 201)
else: getChar()
def mark(msg, errcode = -1):
global errline, errpos, error, errors
global suppress_errors
if lastline > errline or lastpos > errpos:
if not suppress_errors: print('error: line', lastline, 'pos', lastpos, msg)
errors.append(errcode)#(msg)
errline, errpos, error = lastline, lastpos, True
def getSym():
global sym
while chr(0) < ch <= ' ': getChar()
if ch == chr(0): sym = EOF
elif ch == '*': getChar(); sym = TIMES
elif ch == '+': getChar(); sym = PLUS
elif ch == '-': getChar(); sym = MINUS
elif ch == '=': getChar(); sym = EQ
elif ch == '<':
getChar()
if ch == '=': getChar(); sym = LE
elif ch == '>': getChar(); sym = NE
else: sym = LT
elif ch == '>':
getChar()
if ch == '=': getChar(); sym = GE
else: sym = GT
elif ch == ';': getChar(); sym = SEMICOLON
elif ch == ',': getChar(); sym = COMMA
elif ch == ':':
getChar()
if ch == '=': getChar(); sym = BECOMES
else: sym = COLON
elif ch == '.': getChar(); sym = PERIOD
elif ch == '(': getChar(); sym = LPAREN
elif ch == ')': getChar(); sym = RPAREN
elif ch == '[': getChar(); sym = LBRAK
elif ch == ']': getChar(); sym = RBRAK
elif '0' <= ch <= '9': number()
elif 'A' <= ch <= 'Z' or 'a' <= ch <= 'z':
ident()
elif ch == '{': comment(); getSym()
else: getChar(); mark('unrecognized character', 202);sym = None