Skip to content

Commit 923489e

Browse files
committed
Add unicode operator support for Mathematical Operators and Arrows
1 parent be8b157 commit 923489e

File tree

6 files changed

+74
-37
lines changed

6 files changed

+74
-37
lines changed

src/Idris/Parser.idr

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@ export
3939
plhs : ParseOpts
4040
plhs = MkParseOpts False False
4141

42+
leftArrow : Rule ()
43+
leftArrow = symbol "<-" <|> symbol ""
44+
45+
rightArrow : Rule ()
46+
rightArrow = symbol "->" <|> symbol ""
47+
48+
doubleRightArrow : Rule ()
49+
doubleRightArrow = symbol "=>" <|> symbol ""
50+
4251
atom : FileName -> Rule PTerm
4352
atom fname
4453
= do start <- location
@@ -424,12 +433,11 @@ mutual
424433

425434
bindSymbol : Rule (PiInfo PTerm)
426435
bindSymbol
427-
= do symbol "->"
436+
= do rightArrow
428437
pure Explicit
429-
<|> do symbol "=>"
438+
<|> do doubleRightArrow
430439
pure AutoImplicit
431440

432-
433441
explicitPi : FileName -> IndentInfo -> Rule PTerm
434442
explicitPi fname indents
435443
= do start <- location
@@ -449,7 +457,7 @@ mutual
449457
commit
450458
binders <- pibindList fname start indents
451459
symbol "}"
452-
symbol "->"
460+
rightArrow
453461
scope <- typeExpr pdef fname indents
454462
end <- location
455463
pure (pibindAll (MkFC fname start end) AutoImplicit binders scope)
@@ -463,7 +471,7 @@ mutual
463471
t <- simpleExpr fname indents
464472
binders <- pibindList fname start indents
465473
symbol "}"
466-
symbol "->"
474+
rightArrow
467475
scope <- typeExpr pdef fname indents
468476
end <- location
469477
pure (pibindAll (MkFC fname start end) (DefImplicit t) binders scope)
@@ -489,7 +497,7 @@ mutual
489497
symbol "{"
490498
binders <- pibindList fname start indents
491499
symbol "}"
492-
symbol "->"
500+
rightArrow
493501
scope <- typeExpr pdef fname indents
494502
end <- location
495503
pure (pibindAll (MkFC fname start end) Implicit binders scope)
@@ -499,7 +507,7 @@ mutual
499507
= do start <- location
500508
symbol "\\"
501509
binders <- bindList fname start indents
502-
symbol "=>"
510+
doubleRightArrow
503511
mustContinue indents Nothing
504512
scope <- expr pdef fname indents
505513
end <- location
@@ -602,7 +610,7 @@ mutual
602610

603611
caseRHS : FileName -> FilePos -> IndentInfo -> PTerm -> Rule PClause
604612
caseRHS fname start indents lhs
605-
= do symbol "=>"
613+
= do doubleRightArrow
606614
mustContinue indents Nothing
607615
rhs <- expr pdef fname indents
608616
atEnd indents
@@ -639,7 +647,7 @@ mutual
639647

640648
field : FileName -> IndentInfo -> Rule PFieldUpdate
641649
field fname indents
642-
= do path <- sepBy1 (symbol "->") unqualifiedName
650+
= do path <- sepBy1 rightArrow unqualifiedName
643651
upd <- (do symbol "="; pure PSetField)
644652
<|>
645653
(do symbol "$="; pure PSetFieldApp)
@@ -681,7 +689,7 @@ mutual
681689
-- If the name doesn't begin with a lower case letter, we should
682690
-- treat this as a pattern, so fail
683691
validPatternVar n
684-
symbol "<-"
692+
leftArrow
685693
val <- expr pdef fname indents
686694
atEnd indents
687695
end <- location
@@ -707,7 +715,7 @@ mutual
707715
(do atEnd indents
708716
end <- location
709717
pure [DoExp (MkFC fname start end) e])
710-
<|> (do symbol "<-"
718+
<|> (do leftArrow
711719
val <- expr pnowith fname indents
712720
alts <- block (patAlt fname)
713721
atEnd indents
@@ -1157,15 +1165,15 @@ getRight (Right v) = Just v
11571165
constraints : FileName -> IndentInfo -> EmptyRule (List (Maybe Name, PTerm))
11581166
constraints fname indents
11591167
= do tm <- appExpr pdef fname indents
1160-
symbol "=>"
1168+
doubleRightArrow
11611169
more <- constraints fname indents
11621170
pure ((Nothing, tm) :: more)
11631171
<|> do symbol "("
11641172
n <- name
11651173
symbol ":"
11661174
tm <- expr pdef fname indents
11671175
symbol ")"
1168-
symbol "=>"
1176+
doubleRightArrow
11691177
more <- constraints fname indents
11701178
pure ((Just n, tm) :: more)
11711179
<|> pure []
@@ -1179,7 +1187,7 @@ implBinds fname indents
11791187
symbol ":"
11801188
tm <- expr pdef fname indents
11811189
symbol "}"
1182-
symbol "->"
1190+
rightArrow
11831191
more <- implBinds fname indents
11841192
pure ((n, rig, tm) :: more)
11851193
<|> pure []

src/Parser/Lexer.idr

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -99,21 +99,50 @@ blockComment = is '{' <+> is '-' <+> toEndComment 1
9999
docComment : Lexer
100100
docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n')
101101

102+
--
103+
-- Symbols
104+
--
105+
106+
export
107+
isOpChar : Char -> Bool
108+
isOpChar c = inLatin || inArrows || inMathematicalOperators
109+
where
110+
inRange : (Int, Int) -> Lazy Bool
111+
inRange (lowerBound, upperBound) = (c >= chr lowerBound && c <= chr upperBound)
112+
inLatin = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
113+
inArrows = inRange (8592, 8703)
114+
inMathematicalOperators = inRange (8704, 8959)
115+
116+
validSymbol : Lexer
117+
validSymbol = some (pred isOpChar)
118+
119+
||| Special symbols - things which can't be a prefix of another symbol, and
120+
||| don't match 'validSymbol'
121+
export
122+
symbols : List String
123+
symbols
124+
= [".(", -- for things such as Foo.Bar.(+)
125+
"@{",
126+
"[|", "|]",
127+
"(", ")", "{", "}", "[", "]", ",", ";", "_",
128+
"`(", "`"]
129+
130+
--
102131
-- Identifier Lexer
103132
--
104133
-- There are two variants, a strict ident and a relaxed ident.
105134
-- Prime definitions recieve a boolean determining if it is relaxed.
106135

107136
startIdent : Char -> Bool
108137
startIdent '_' = True
109-
startIdent x = isAlpha x || x > chr 127
138+
startIdent c = isAlpha c || ((c > chr 127) && not (isOpChar c))
110139

111140
%inline
112141
validIdent' : Bool -> Char -> Bool
113142
validIdent' _ '_' = True
114143
validIdent' r '-' = r
115144
validIdent' _ '\'' = True
116-
validIdent' _ x = isAlphaNum x || x > chr 127
145+
validIdent' _ c = isAlphaNum c || ((c > chr 127) && not (isOpChar c))
117146

118147
%inline
119148
ident' : Bool -> Lexer
@@ -134,6 +163,10 @@ identRelaxed = ident' True
134163
holeIdent : Lexer
135164
holeIdent = is '?' <+> identStrict
136165

166+
--
167+
-- Literal Lexers
168+
--
169+
137170
doubleLit : Lexer
138171
doubleLit
139172
= digits <+> is '.' <+> digits <+> opt
@@ -168,32 +201,13 @@ keywords = ["data", "module", "where", "let", "in", "do", "record",
168201
special : List String
169202
special = ["%lam", "%pi", "%imppi", "%let"]
170203

171-
-- Special symbols - things which can't be a prefix of another symbol, and
172-
-- don't match 'validSymbol'
173-
export
174-
symbols : List String
175-
symbols
176-
= [".(", -- for things such as Foo.Bar.(+)
177-
"@{",
178-
"[|", "|]",
179-
"(", ")", "{", "}", "[", "]", ",", ";", "_",
180-
"`(", "`"]
181-
182-
183-
export
184-
isOpChar : Char -> Bool
185-
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
186-
187-
validSymbol : Lexer
188-
validSymbol = some (pred isOpChar)
189-
190204
-- Valid symbols which have a special meaning so can't be operators
191205
export
192206
reservedSymbols : List String
193207
reservedSymbols
194208
= symbols ++
195-
["%", "\\", ":", "=", "|", "|||", "<-", "->", "=>", "?", "!",
196-
"&", "**", ".."]
209+
["%", "\\", ":", "=", "|", "|||", "?", "!", "&", "**",
210+
"..", "<-", "->", "=>", "", "", ""]
197211

198212
fromHexLit : String -> Integer
199213
fromHexLit str

tests/Main.idr

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ idrisTests
5151
"literate001", "literate002", "literate003", "literate004",
5252
"literate005", "literate006", "literate007", "literate008",
5353
"literate009",
54+
-- Unicode
55+
"unicode001",
5456
-- Interfaces
5557
"interface001", "interface002", "interface003", "interface004",
5658
"interface005", "interface006", "interface007", "interface008",
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
module Unicode
2+
3+
infix 6 ≡, ≢
4+
5+
public export
6+
interface UnicodeEq a where
7+
(≡) : a → a → Bool
8+
(≢) : a → a → Bool
9+

tests/idris2/unicode001/expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1/1: Building Unicode (Unicode.idr)

tests/idris2/unicode001/run

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
$1 -c Unicode.idr
2+
3+
rm -rf build/

0 commit comments

Comments
 (0)