@@ -61,20 +61,39 @@ blockComment = is '{' <+> is '-' <+> toEndComment 1
6161docComment : Lexer
6262docComment = is ' |' <+> is ' |' <+> is ' |' <+> many (isNot ' \n ' )
6363
64- ident : Lexer
65- ident = pred startIdent <+> many (pred validIdent)
66- where
67- startIdent : Char -> Bool
68- startIdent ' _' = True
69- startIdent x = isAlpha x || x > chr 127
64+ -- Identifier Lexer
65+ --
66+ -- There are two variants, a strict ident and a relaxed ident.
67+ -- Prime definitions recieve a boolean determining if it is relaxed.
68+
69+ startIdent : Char -> Bool
70+ startIdent ' _' = True
71+ startIdent x = isAlpha x || x > chr 127
72+
73+ %inline
74+ validIdent' : Bool -> Char -> Bool
75+ validIdent' _ ' _' = True
76+ validIdent' r ' -' = r
77+ validIdent' _ ' \' ' = True
78+ validIdent' _ x = isAlphaNum x || x > chr 127
79+
80+ %inline
81+ ident' : Bool -> Lexer
82+ ident' relaxed =
83+ (pred $ startIdent) <+>
84+ (many . pred $ validIdent' relaxed)
85+
86+ -- This are the two identifier lexer specializations
7087
71- validIdent : Char -> Bool
72- validIdent ' _' = True
73- validIdent ' \' ' = True
74- validIdent x = isAlphaNum x || x > chr 127
88+ identStrict : Lexer
89+ identStrict = ident' False
90+
91+ export
92+ identRelaxed : Lexer
93+ identRelaxed = ident' True
7594
7695holeIdent : Lexer
77- holeIdent = is ' ?' <+> ident
96+ holeIdent = is ' ?' <+> identStrict
7897
7998doubleLit : Lexer
8099doubleLit
@@ -121,12 +140,13 @@ symbols
121140 " (" , " )" , " {" , " }" , " [" , " ]" , " ," , " ;" , " _" ,
122141 " `(" , " `" ]
123142
143+
124144export
125- opChars : String
126- opChars = " :!#$%&*+./<=>?@\\ ^|-~"
145+ isOpChar : Char -> Bool
146+ isOpChar c = c `elem` (unpack " :!#$%&*+./<=>?@\\ ^|-~" )
127147
128148validSymbol : Lexer
129- validSymbol = some (oneOf opChars )
149+ validSymbol = some (pred isOpChar )
130150
131151-- Valid symbols which have a special meaning so can't be operators
132152export
@@ -136,9 +156,6 @@ reservedSymbols
136156 [" %" , " \\ " , " :" , " =" , " |" , " |||" , " <-" , " ->" , " =>" , " ?" , " !" ,
137157 " &" , " **" , " .." ]
138158
139- symbolChar : Char -> Bool
140- symbolChar c = c `elem` unpack opChars
141-
142159fromHexLit : String -> Integer
143160fromHexLit str
144161 = if length str <= 2
@@ -161,7 +178,7 @@ rawTokens =
161178 (digits, \ x => Literal (cast x)),
162179 (stringLit, \ x => StrLit (stripQuotes x)),
163180 (charLit, \ x => CharLit (stripQuotes x)),
164- (ident , \ x => if x `elem` keywords then Keyword x else Ident x),
181+ (identStrict , \ x => if x `elem` keywords then Keyword x else Ident x),
165182 (space, Comment ),
166183 (validSymbol, Symbol ),
167184 (symbol, Unrecognised )]
0 commit comments