-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.py
73 lines (64 loc) · 1.83 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
__author__ = 'Dave and Alex'
import re
import sys
RESERVED = 'RESERVED'
INT = 'INT'
VAR = 'VAR'
FLOAT = 'FLOAT'
lex = [
(r'[ \n\t]+', None), #matches all whitespaces
(r'#[^\n]*', None),
(r'\=', RESERVED),
(r'\(', RESERVED),
(r'\)', RESERVED),
(r'\+', RESERVED),
(r'-', RESERVED),
(r'\*', RESERVED),
(r'/', RESERVED),
(r'\%', RESERVED),
(r'\^', RESERVED),
(r'<=', RESERVED),
(r'<', RESERVED),
(r'>=', RESERVED),
(r'>', RESERVED),
(r'equal', RESERVED),
(r'not equal', RESERVED),
(r'if', RESERVED),
(r'else:', RESERVED),
(r':', RESERVED),
(r'end', RESERVED),
(r';', RESERVED),
(r'while', RESERVED),
(r'function', RESERVED),
(r'for', RESERVED),
(r'to', RESERVED),
(r'call', RESERVED),
(r'show', RESERVED),
(r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?', FLOAT), # matches all float values
(r'[0-9]+', INT), # matches integer values
(r'[A-Za-z][A-Za-z0-9_]*', VAR), # matches var names
]
# using regex, evaluates if the word is reserved, integer, variable, float, or a space
def lexer(chars, sentence):
pos = 0 #Begins in position 0 of the sentence
tokens = []
while pos < len(chars):
match = None # Lexer matching starts with no match
for token_exp in sentence:
pattern, tag = token_exp
regex = re.compile(pattern)
match = regex.match(chars, pos)
if match:
text = match.group(0)
if tag:
token_exp = (text, tag)
tokens.append(token_exp)
break
if not match:
sys.stderr.write('Illegal character: %s\n' % chars[pos])
sys.exit(1)
else:
pos = match.end(0)
return tokens
def do_lex(characters):
return lexer(characters, lex)