Skip to content

Commit e4245d4

Browse files
committed
Add unicode operator support for Mathematical Operators and Arrows
1 parent 7edef68 commit e4245d4

File tree

6 files changed

+78
-37
lines changed

6 files changed

+78
-37
lines changed

src/Idris/Parser.idr

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@ export
3939
plhs : ParseOpts
4040
plhs = MkParseOpts False False
4141

42+
leftArrow : Rule ()
43+
leftArrow = symbol "<-" <|> symbol ""
44+
45+
rightArrow : Rule ()
46+
rightArrow = symbol "->" <|> symbol ""
47+
48+
doubleRightArrow : Rule ()
49+
doubleRightArrow = symbol "=>" <|> symbol ""
50+
4251
atom : FileName -> Rule PTerm
4352
atom fname
4453
= do start <- location
@@ -436,12 +445,11 @@ mutual
436445

437446
bindSymbol : Rule (PiInfo PTerm)
438447
bindSymbol
439-
= do symbol "->"
448+
= do rightArrow
440449
pure Explicit
441-
<|> do symbol "=>"
450+
<|> do doubleRightArrow
442451
pure AutoImplicit
443452

444-
445453
explicitPi : FileName -> IndentInfo -> Rule PTerm
446454
explicitPi fname indents
447455
= do start <- location
@@ -461,7 +469,7 @@ mutual
461469
commit
462470
binders <- pibindList fname start indents
463471
symbol "}"
464-
symbol "->"
472+
rightArrow
465473
scope <- typeExpr pdef fname indents
466474
end <- location
467475
pure (pibindAll (MkFC fname start end) AutoImplicit binders scope)
@@ -475,7 +483,7 @@ mutual
475483
t <- simpleExpr fname indents
476484
binders <- pibindList fname start indents
477485
symbol "}"
478-
symbol "->"
486+
rightArrow
479487
scope <- typeExpr pdef fname indents
480488
end <- location
481489
pure (pibindAll (MkFC fname start end) (DefImplicit t) binders scope)
@@ -501,7 +509,7 @@ mutual
501509
symbol "{"
502510
binders <- pibindList fname start indents
503511
symbol "}"
504-
symbol "->"
512+
rightArrow
505513
scope <- typeExpr pdef fname indents
506514
end <- location
507515
pure (pibindAll (MkFC fname start end) Implicit binders scope)
@@ -511,7 +519,7 @@ mutual
511519
= do start <- location
512520
symbol "\\"
513521
binders <- bindList fname start indents
514-
symbol "=>"
522+
doubleRightArrow
515523
mustContinue indents Nothing
516524
scope <- expr pdef fname indents
517525
end <- location
@@ -614,7 +622,7 @@ mutual
614622

615623
caseRHS : FileName -> FilePos -> IndentInfo -> PTerm -> Rule PClause
616624
caseRHS fname start indents lhs
617-
= do symbol "=>"
625+
= do doubleRightArrow
618626
mustContinue indents Nothing
619627
rhs <- expr pdef fname indents
620628
atEnd indents
@@ -703,7 +711,7 @@ mutual
703711
-- If the name doesn't begin with a lower case letter, we should
704712
-- treat this as a pattern, so fail
705713
validPatternVar n
706-
symbol "<-"
714+
leftArrow
707715
val <- expr pdef fname indents
708716
atEnd indents
709717
end <- location
@@ -729,7 +737,7 @@ mutual
729737
(do atEnd indents
730738
end <- location
731739
pure [DoExp (MkFC fname start end) e])
732-
<|> (do symbol "<-"
740+
<|> (do leftArrow
733741
val <- expr pnowith fname indents
734742
alts <- block (patAlt fname)
735743
atEnd indents
@@ -1196,15 +1204,15 @@ getRight (Right v) = Just v
11961204
constraints : FileName -> IndentInfo -> EmptyRule (List (Maybe Name, PTerm))
11971205
constraints fname indents
11981206
= do tm <- appExpr pdef fname indents
1199-
symbol "=>"
1207+
doubleRightArrow
12001208
more <- constraints fname indents
12011209
pure ((Nothing, tm) :: more)
12021210
<|> do symbol "("
12031211
n <- name
12041212
symbol ":"
12051213
tm <- expr pdef fname indents
12061214
symbol ")"
1207-
symbol "=>"
1215+
doubleRightArrow
12081216
more <- constraints fname indents
12091217
pure ((Just n, tm) :: more)
12101218
<|> pure []
@@ -1218,7 +1226,7 @@ implBinds fname indents
12181226
symbol ":"
12191227
tm <- expr pdef fname indents
12201228
symbol "}"
1221-
symbol "->"
1229+
rightArrow
12221230
more <- implBinds fname indents
12231231
pure ((n, rig, tm) :: more)
12241232
<|> pure []

src/Parser/Lexer.idr

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -110,21 +110,54 @@ blockComment = is '{' <+> is '-' <+> toEndComment 1
110110
docComment : Lexer
111111
docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n')
112112

113+
--
114+
-- Symbols
115+
--
116+
117+
export
118+
isOpChar : Char -> Bool
119+
isOpChar c = inLatin || inArrows || inMathematicalOperators
120+
where
121+
inRange : (Int, Int) -> Lazy Bool
122+
inRange (lowerBound, upperBound) = (c >= chr lowerBound && c <= chr upperBound)
123+
inLatin = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
124+
inArrows = inRange (8592, 8703)
125+
inMathematicalOperators = inRange (8704, 8959)
126+
127+
nonOpCharUnicode : Char -> Bool
128+
nonOpCharUnicode c = (c > chr 160) && not (isOpChar c)
129+
130+
131+
validSymbol : Lexer
132+
validSymbol = some (pred isOpChar)
133+
134+
||| Special symbols - things which can't be a prefix of another symbol, and
135+
||| don't match 'validSymbol'
136+
export
137+
symbols : List String
138+
symbols
139+
= [".(", -- for things such as Foo.Bar.(+)
140+
"@{",
141+
"[|", "|]",
142+
"(", ")", "{", "}", "[", "]", ",", ";", "_",
143+
"`(", "`"]
144+
145+
--
113146
-- Identifier Lexer
114147
-- There are multiple variants.
115148

116149
data Flavour = Capitalised | AllowDashes | Normal
117150

118151
isIdentStart : Flavour -> Char -> Bool
119152
isIdentStart _ '_' = True
120-
isIdentStart Capitalised x = isUpper x || x > chr 160
121-
isIdentStart _ x = isAlpha x || x > chr 160
153+
isIdentStart Capitalised c = isUpper c || nonOpCharUnicode c
154+
isIdentStart _ c = isAlpha c || nonOpCharUnicode c
122155

123156
isIdentTrailing : Flavour -> Char -> Bool
124157
isIdentTrailing AllowDashes '-' = True
125158
isIdentTrailing _ '\'' = True
126159
isIdentTrailing _ '_' = True
127-
isIdentTrailing _ x = isAlphaNum x || x > chr 160
160+
isIdentTrailing _ c = isAlphaNum c || nonOpCharUnicode c
128161

129162
%inline
130163
isIdent : Flavour -> String -> Bool
@@ -163,6 +196,10 @@ recField = is '.' <+> ident Normal
163196
pragma : Lexer
164197
pragma = is '%' <+> ident Normal
165198

199+
--
200+
-- Literal Lexers
201+
--
202+
166203
doubleLit : Lexer
167204
doubleLit
168205
= digits <+> is '.' <+> digits <+> opt
@@ -197,32 +234,13 @@ keywords = ["data", "module", "where", "let", "in", "do", "record",
197234
special : List String
198235
special = ["%lam", "%pi", "%imppi", "%let"]
199236

200-
-- Special symbols - things which can't be a prefix of another symbol, and
201-
-- don't match 'validSymbol'
202-
export
203-
symbols : List String
204-
symbols
205-
= [".(", -- for things such as Foo.Bar.(+)
206-
"@{",
207-
"[|", "|]",
208-
"(", ")", "{", "}", "[", "]", ",", ";", "_",
209-
"`(", "`"]
210-
211-
212-
export
213-
isOpChar : Char -> Bool
214-
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
215-
216-
validSymbol : Lexer
217-
validSymbol = some (pred isOpChar)
218-
219237
-- Valid symbols which have a special meaning so can't be operators
220238
export
221239
reservedSymbols : List String
222240
reservedSymbols
223241
= symbols ++
224-
["%", "\\", ":", "=", "|", "|||", "<-", "->", "=>", "?", "!",
225-
"&", "**", ".."]
242+
["%", "\\", ":", "=", "|", "|||", "?", "!", "&", "**",
243+
"..", "<-", "->", "=>", "", "", ""]
226244

227245
fromHexLit : String -> Integer
228246
fromHexLit str

tests/Main.idr

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ idrisTests
5151
"literate001", "literate002", "literate003", "literate004",
5252
"literate005", "literate006", "literate007", "literate008",
5353
"literate009", "literate010", "literate011", "literate012",
54+
-- Unicode
55+
"unicode001",
5456
-- Interfaces
5557
"interface001", "interface002", "interface003", "interface004",
5658
"interface005", "interface006", "interface007", "interface008",
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
module Unicode
2+
3+
infix 6 ≡, ≢
4+
5+
public export
6+
interface UnicodeEq a where
7+
(≡) : a → a → Bool
8+
(≢) : a → a → Bool
9+

tests/idris2/unicode001/expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1/1: Building Unicode (Unicode.idr)

tests/idris2/unicode001/run

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
$1 -c Unicode.idr
2+
3+
rm -rf build/

0 commit comments

Comments
 (0)