Skip to content

Commit 05298b4

Browse files
committed
Refactor ident to reuse in IDEMode, fixes out of sync bug.
Related: 1a4f424 on 2019-September-28
1 parent b665e6a commit 05298b4

File tree

6 files changed

+49
-60
lines changed

6 files changed

+49
-60
lines changed

src/Core/Name.idr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ showSep sep (x :: xs) = x ++ sep ++ showSep sep xs
5656
||| Check whether a given character is a valid identifier character
5757
export
5858
identChar : Char -> Bool
59-
identChar x = isAlphaNum x || x == '_' || x == '\''
59+
identChar x = isAlphaNum x || x == '_' || x == '\'' || x > chr 127
6060

6161
export Show Name where
6262
show (NS ns n) = showSep "." (reverse ns) ++ "." ++ show n

src/Idris/Desugar.idr

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,10 @@ toTokList (POp fc opn l r)
7272
let op = nameRoot opn
7373
case lookup op (infixes syn) of
7474
Nothing =>
75-
let ops = unpack opChars in
76-
if any (\x => x `elem` ops) (unpack op)
77-
then throw (GenericMsg fc $ "Unknown operator '" ++ op ++ "'")
78-
else do rtoks <- toTokList r
79-
pure (Expr l :: Op fc opn backtickPrec :: rtoks)
75+
if any isOpChar (unpack op)
76+
then throw (GenericMsg fc $ "Unknown operator '" ++ op ++ "'")
77+
else do rtoks <- toTokList r
78+
pure (Expr l :: Op fc opn backtickPrec :: rtoks)
8079
Just (Prefix, _) =>
8180
throw (GenericMsg fc $ "'" ++ op ++ "' is a prefix operator")
8281
Just (fix, prec) =>

src/Idris/IDEMode/MakeClause.idr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import Parser.Unlit
99
showRHSName : Name -> String
1010
showRHSName n
1111
= let fn = show (dropNS n) in
12-
if any (flip elem (unpack opChars)) (unpack fn)
12+
if any isOpChar (unpack fn)
1313
then "op"
1414
else fn
1515

src/Idris/IDEMode/Parser.idr

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
||| Slightly different lexer than the source language because we are more free
2+
||| as to what can be identifiers, and fewer tokens are supported. But otherwise,
3+
||| we can reuse the standard stuff
14
module Idris.IDEMode.Parser
25

36
import Idris.IDEMode.Commands
@@ -7,34 +10,17 @@ import Parser.Lexer
710
import Parser.Support
811
import Text.Lexer
912

10-
-- Slightly different lexer than the source language because we are more free
11-
-- as to what can be identifiers, and fewer tokens are supported. But otherwise,
12-
-- we can reuse the standard stuff
13-
1413
%hide Lexer.symbols
1514

1615
symbols : List String
1716
symbols = ["(", ":", ")"]
1817

19-
ident : Lexer
20-
ident = pred startIdent <+> many (pred validIdent)
21-
where
22-
startIdent : Char -> Bool
23-
startIdent '_' = True
24-
startIdent x = isAlpha x
25-
26-
validIdent : Char -> Bool
27-
validIdent '_' = True
28-
validIdent '-' = True
29-
validIdent '\'' = True
30-
validIdent x = isAlphaNum x
31-
3218
ideTokens : TokenMap Token
3319
ideTokens =
3420
map (\x => (exact x, Symbol)) symbols ++
3521
[(digits, \x => Literal (cast x)),
3622
(stringLit, \x => StrLit (stripQuotes x)),
37-
(ident, Ident),
23+
(identRelaxed, Ident),
3824
(space, Comment)]
3925
where
4026
stripQuotes : String -> String

src/Idris/IDEMode/TokenLine.idr

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1+
||| Tokenise a source line for easier processing
12
module Idris.IDEMode.TokenLine
23

3-
-- Tokenise a source line for easier processing
4-
4+
import Parser.Lexer
55
import Text.Lexer
66

77
public export
@@ -14,24 +14,12 @@ data SourcePart
1414
| Equal
1515
| Other String
1616

17-
ident : Lexer
18-
ident = pred startIdent <+> many (pred validIdent)
19-
where
20-
startIdent : Char -> Bool
21-
startIdent '_' = True
22-
startIdent x = isAlpha x
23-
24-
validIdent : Char -> Bool
25-
validIdent '_' = True
26-
validIdent '\'' = True
27-
validIdent x = isAlphaNum x
28-
2917
holeIdent : Lexer
30-
holeIdent = is '?' <+> ident
18+
holeIdent = is '?' <+> identRelaxed
3119

3220
srcTokens : TokenMap SourcePart
3321
srcTokens =
34-
[(ident, Name),
22+
[(identRelaxed, Name),
3523
(holeIdent, \x => HoleName (assert_total (strTail x))),
3624
(space, Whitespace),
3725
(is '{', const LBrace),
@@ -47,4 +35,3 @@ tokens str
4735
-- number to read when storing spans in the file
4836
(srctoks, (l, c, rest)) =>
4937
map tok srctoks ++ (if rest == "" then [] else [Other rest])
50-

src/Parser/Lexer.idr

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -61,20 +61,39 @@ blockComment = is '{' <+> is '-' <+> toEndComment 1
6161
docComment : Lexer
6262
docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n')
6363

64-
ident : Lexer
65-
ident = pred startIdent <+> many (pred validIdent)
66-
where
67-
startIdent : Char -> Bool
68-
startIdent '_' = True
69-
startIdent x = isAlpha x || x > chr 127
64+
-- Identifier Lexer
65+
--
66+
-- There are two variants, a strict ident and a relaxed ident.
67+
-- Prime definitions recieve a boolean determining if it is relaxed.
68+
69+
startIdent : Char -> Bool
70+
startIdent '_' = True
71+
startIdent x = isAlpha x || x > chr 127
72+
73+
%inline
74+
validIdent' : Bool -> Char -> Bool
75+
validIdent' _ '_' = True
76+
validIdent' r '-' = r
77+
validIdent' _ '\'' = True
78+
validIdent' _ x = isAlphaNum x || x > chr 127
79+
80+
%inline
81+
ident' : Bool -> Lexer
82+
ident' relaxed =
83+
(pred $ startIdent) <+>
84+
(many . pred $ validIdent' relaxed)
85+
86+
-- This are the two identifier lexer specializations
7087

71-
validIdent : Char -> Bool
72-
validIdent '_' = True
73-
validIdent '\'' = True
74-
validIdent x = isAlphaNum x || x > chr 127
88+
identStrict : Lexer
89+
identStrict = ident' False
90+
91+
export
92+
identRelaxed : Lexer
93+
identRelaxed = ident' True
7594

7695
holeIdent : Lexer
77-
holeIdent = is '?' <+> ident
96+
holeIdent = is '?' <+> identStrict
7897

7998
doubleLit : Lexer
8099
doubleLit
@@ -121,12 +140,13 @@ symbols
121140
"(", ")", "{", "}", "[", "]", ",", ";", "_",
122141
"`(", "`"]
123142

143+
124144
export
125-
opChars : String
126-
opChars = ":!#$%&*+./<=>?@\\^|-~"
145+
isOpChar : Char -> Bool
146+
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
127147

128148
validSymbol : Lexer
129-
validSymbol = some (oneOf opChars)
149+
validSymbol = some (pred isOpChar)
130150

131151
-- Valid symbols which have a special meaning so can't be operators
132152
export
@@ -136,9 +156,6 @@ reservedSymbols
136156
["%", "\\", ":", "=", "|", "|||", "<-", "->", "=>", "?", "!",
137157
"&", "**", ".."]
138158

139-
symbolChar : Char -> Bool
140-
symbolChar c = c `elem` unpack opChars
141-
142159
fromHexLit : String -> Integer
143160
fromHexLit str
144161
= if length str <= 2
@@ -161,7 +178,7 @@ rawTokens =
161178
(digits, \x => Literal (cast x)),
162179
(stringLit, \x => StrLit (stripQuotes x)),
163180
(charLit, \x => CharLit (stripQuotes x)),
164-
(ident, \x => if x `elem` keywords then Keyword x else Ident x),
181+
(identStrict, \x => if x `elem` keywords then Keyword x else Ident x),
165182
(space, Comment),
166183
(validSymbol, Symbol),
167184
(symbol, Unrecognised)]

0 commit comments

Comments
 (0)