-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathlexer.go
199 lines (176 loc) · 4.36 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
// This package provides a Lexer that functions similarly to Rob Pike's discussion
// about lexer design in this [talk](https://www.youtube.com/watch?v=HxaD_trXwRE).
//
// You can define your token types by using the `lexer.TokenType` type (`int`) via
//
// const (
// StringToken lexer.TokenType = iota
// IntegerToken
// // etc...
// )
//
// And then you define your own state functions (`lexer.StateFunc`) to handle
// analyzing the string.
//
// func StringState(l *lexer.L) lexer.StateFunc {
// l.Next() // eat starting "
// l.Ignore() // drop current value
// while l.Peek() != '"' {
// l.Next()
// }
// l.Emit(StringToken)
//
// return SomeStateFunction
// }
//
// This Lexer is meant to emit tokens in such a fashion that it can be consumed
// by go yacc.
package lexer
import (
"errors"
"strings"
"unicode/utf8"
)
type StateFunc func(*L) StateFunc
type TokenType int
const (
EOFRune rune = -1
EmptyToken TokenType = 0
)
type Token struct {
Type TokenType
Value string
}
type L struct {
source string
start, position int
startState StateFunc
Err error
tokens chan Token
ErrorHandler func(e string)
rewind runeStack
}
// New creates a returns a lexer ready to parse the given source code.
func New(src string, start StateFunc) *L {
return &L{
source: src,
startState: start,
start: 0,
position: 0,
rewind: newRuneStack(),
}
}
// Start begins executing the Lexer in an asynchronous manner (using a goroutine).
func (l *L) Start() {
// Take half the string length as a buffer size.
buffSize := len(l.source) / 2
if buffSize <= 0 {
buffSize = 1
}
l.tokens = make(chan Token, buffSize)
go l.run()
}
func (l *L) StartSync() {
// Take half the string length as a buffer size.
buffSize := len(l.source) / 2
if buffSize <= 0 {
buffSize = 1
}
l.tokens = make(chan Token, buffSize)
l.run()
}
// Current returns the value being being analyzed at this moment.
func (l *L) Current() string {
return l.source[l.start:l.position]
}
// Emit will receive a token type and push a new token with the current analyzed
// value into the tokens channel.
func (l *L) Emit(t TokenType) {
tok := Token{
Type: t,
Value: l.Current(),
}
l.tokens <- tok
l.start = l.position
l.rewind.clear()
}
// Ignore clears the rewind stack and then sets the current beginning position
// to the current position in the source which effectively ignores the section
// of the source being analyzed.
func (l *L) Ignore() {
l.rewind.clear()
l.start = l.position
}
// Peek performs a Next operation immediately followed by a Rewind returning the
// peeked rune.
func (l *L) Peek() rune {
r := l.Next()
l.Rewind()
return r
}
// Rewind will take the last rune read (if any) and rewind back. Rewinds can
// occur more than once per call to Next but you can never rewind past the
// last point a token was emitted.
func (l *L) Rewind() {
r := l.rewind.pop()
if r > EOFRune {
size := utf8.RuneLen(r)
l.position -= size
if l.position < l.start {
l.position = l.start
}
}
}
// Next pulls the next rune from the Lexer and returns it, moving the position
// forward in the source.
func (l *L) Next() rune {
var (
r rune
s int
)
str := l.source[l.position:]
if len(str) == 0 {
r, s = EOFRune, 0
} else {
r, s = utf8.DecodeRuneInString(str)
}
l.position += s
l.rewind.push(r)
return r
}
// Take receives a string containing all acceptable strings and will contine
// over each consecutive character in the source until a token not in the given
// string is encountered. This should be used to quickly pull token parts.
func (l *L) Take(chars string) {
r := l.Next()
for strings.ContainsRune(chars, r) {
r = l.Next()
}
l.Rewind() // last next wasn't a match
}
// NextToken returns the next token from the lexer and a value to denote whether
// or not the token is finished.
func (l *L) NextToken() (*Token, bool) {
if tok, ok := <-l.tokens; ok {
return &tok, false
} else {
return nil, true
}
}
// Partial yyLexer implementation
func (l *L) Error(e string) {
if l.ErrorHandler != nil {
l.Err = errors.New(e)
l.ErrorHandler(e)
} else {
panic(e)
}
}
// Private methods
func (l *L) run() {
state := l.startState
for state != nil {
state = state(l)
}
close(l.tokens)
}