Skip to content

Commit c938889

Browse files
committed
Add unicode operator support for Mathematical Operators and Arrows
1 parent 05298b4 commit c938889

File tree

6 files changed

+78
-37
lines changed

6 files changed

+78
-37
lines changed

src/Idris/Parser.idr

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@ export
3939
plhs : ParseOpts
4040
plhs = MkParseOpts False False
4141

42+
leftArrow : Rule ()
43+
leftArrow = symbol "<-" <|> symbol ""
44+
45+
rightArrow : Rule ()
46+
rightArrow = symbol "->" <|> symbol ""
47+
48+
doubleRightArrow : Rule ()
49+
doubleRightArrow = symbol "=>" <|> symbol ""
50+
4251
atom : FileName -> Rule PTerm
4352
atom fname
4453
= do start <- location
@@ -418,12 +427,11 @@ mutual
418427

419428
bindSymbol : Rule (PiInfo PTerm)
420429
bindSymbol
421-
= do symbol "->"
430+
= do rightArrow
422431
pure Explicit
423-
<|> do symbol "=>"
432+
<|> do doubleRightArrow
424433
pure AutoImplicit
425434

426-
427435
explicitPi : FileName -> IndentInfo -> Rule PTerm
428436
explicitPi fname indents
429437
= do start <- location
@@ -443,7 +451,7 @@ mutual
443451
commit
444452
binders <- pibindList fname start indents
445453
symbol "}"
446-
symbol "->"
454+
rightArrow
447455
scope <- typeExpr pdef fname indents
448456
end <- location
449457
pure (pibindAll (MkFC fname start end) AutoImplicit binders scope)
@@ -457,7 +465,7 @@ mutual
457465
t <- simpleExpr fname indents
458466
binders <- pibindList fname start indents
459467
symbol "}"
460-
symbol "->"
468+
rightArrow
461469
scope <- typeExpr pdef fname indents
462470
end <- location
463471
pure (pibindAll (MkFC fname start end) (DefImplicit t) binders scope)
@@ -483,7 +491,7 @@ mutual
483491
symbol "{"
484492
binders <- pibindList fname start indents
485493
symbol "}"
486-
symbol "->"
494+
rightArrow
487495
scope <- typeExpr pdef fname indents
488496
end <- location
489497
pure (pibindAll (MkFC fname start end) Implicit binders scope)
@@ -493,7 +501,7 @@ mutual
493501
= do start <- location
494502
symbol "\\"
495503
binders <- bindList fname start indents
496-
symbol "=>"
504+
doubleRightArrow
497505
mustContinue indents Nothing
498506
scope <- expr pdef fname indents
499507
end <- location
@@ -596,7 +604,7 @@ mutual
596604

597605
caseRHS : FileName -> FilePos -> IndentInfo -> PTerm -> Rule PClause
598606
caseRHS fname start indents lhs
599-
= do symbol "=>"
607+
= do doubleRightArrow
600608
mustContinue indents Nothing
601609
rhs <- expr pdef fname indents
602610
atEnd indents
@@ -633,7 +641,7 @@ mutual
633641

634642
field : FileName -> IndentInfo -> Rule PFieldUpdate
635643
field fname indents
636-
= do path <- sepBy1 (symbol "->") unqualifiedName
644+
= do path <- sepBy1 rightArrow unqualifiedName
637645
upd <- (do symbol "="; pure PSetField)
638646
<|>
639647
(do symbol "$="; pure PSetFieldApp)
@@ -675,7 +683,7 @@ mutual
675683
-- If the name doesn't begin with a lower case letter, we should
676684
-- treat this as a pattern, so fail
677685
validPatternVar n
678-
symbol "<-"
686+
leftArrow
679687
val <- expr pdef fname indents
680688
atEnd indents
681689
end <- location
@@ -701,7 +709,7 @@ mutual
701709
(do atEnd indents
702710
end <- location
703711
pure [DoExp (MkFC fname start end) e])
704-
<|> (do symbol "<-"
712+
<|> (do leftArrow
705713
val <- expr pnowith fname indents
706714
alts <- block (patAlt fname)
707715
atEnd indents
@@ -1151,15 +1159,15 @@ getRight (Right v) = Just v
11511159
constraints : FileName -> IndentInfo -> EmptyRule (List (Maybe Name, PTerm))
11521160
constraints fname indents
11531161
= do tm <- appExpr pdef fname indents
1154-
symbol "=>"
1162+
doubleRightArrow
11551163
more <- constraints fname indents
11561164
pure ((Nothing, tm) :: more)
11571165
<|> do symbol "("
11581166
n <- name
11591167
symbol ":"
11601168
tm <- expr pdef fname indents
11611169
symbol ")"
1162-
symbol "=>"
1170+
doubleRightArrow
11631171
more <- constraints fname indents
11641172
pure ((Just n, tm) :: more)
11651173
<|> pure []
@@ -1173,7 +1181,7 @@ implBinds fname indents
11731181
symbol ":"
11741182
tm <- expr pdef fname indents
11751183
symbol "}"
1176-
symbol "->"
1184+
rightArrow
11771185
more <- implBinds fname indents
11781186
pure ((n, rig, tm) :: more)
11791187
<|> pure []

src/Parser/Lexer.idr

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ export
4040
Show (TokenData Token) where
4141
show t = show (line t, col t, tok t)
4242

43+
--
44+
-- Comment Lexers
45+
--
46+
4347
comment : Lexer
4448
comment = is '-' <+> is '-' <+> many (isNot '\n')
4549

@@ -61,21 +65,50 @@ blockComment = is '{' <+> is '-' <+> toEndComment 1
6165
docComment : Lexer
6266
docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n')
6367

68+
--
69+
-- Symbols
70+
--
71+
72+
export
73+
isOpChar : Char -> Bool
74+
isOpChar c = inLatin || inArrows || inMathematicalOperators
75+
where
76+
inRange : (Int, Int) -> Lazy Bool
77+
inRange (lowerBound, upperBound) = (c >= chr lowerBound && c <= chr upperBound)
78+
inLatin = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
79+
inArrows = inRange (8592, 8703)
80+
inMathematicalOperators = inRange (8704, 8959)
81+
82+
validSymbol : Lexer
83+
validSymbol = some (pred isOpChar)
84+
85+
||| Special symbols - things which can't be a prefix of another symbol, and
86+
||| don't match 'validSymbol'
87+
export
88+
symbols : List String
89+
symbols
90+
= [".(", -- for things such as Foo.Bar.(+)
91+
"@{",
92+
"[|", "|]",
93+
"(", ")", "{", "}", "[", "]", ",", ";", "_",
94+
"`(", "`"]
95+
96+
--
6497
-- Identifier Lexer
6598
--
6699
-- There are two variants, a strict ident and a relaxed ident.
67100
-- Prime definitions recieve a boolean determining if it is relaxed.
68101

69102
startIdent : Char -> Bool
70103
startIdent '_' = True
71-
startIdent x = isAlpha x || x > chr 127
104+
startIdent c = isAlpha c || ((c > chr 127) && not (isOpChar c))
72105

73106
%inline
74107
validIdent' : Bool -> Char -> Bool
75108
validIdent' _ '_' = True
76109
validIdent' r '-' = r
77110
validIdent' _ '\'' = True
78-
validIdent' _ x = isAlphaNum x || x > chr 127
111+
validIdent' _ c = isAlphaNum c || ((c > chr 127) && not (isOpChar c))
79112

80113
%inline
81114
ident' : Bool -> Lexer
@@ -95,6 +128,10 @@ identRelaxed = ident' True
95128
holeIdent : Lexer
96129
holeIdent = is '?' <+> identStrict
97130

131+
--
132+
-- Literal Lexers
133+
--
134+
98135
doubleLit : Lexer
99136
doubleLit
100137
= digits <+> is '.' <+> digits <+> opt
@@ -129,32 +166,13 @@ keywords = ["data", "module", "where", "let", "in", "do", "record",
129166
special : List String
130167
special = ["%lam", "%pi", "%imppi", "%let"]
131168

132-
-- Special symbols - things which can't be a prefix of another symbol, and
133-
-- don't match 'validSymbol'
134-
export
135-
symbols : List String
136-
symbols
137-
= [".(", -- for things such as Foo.Bar.(+)
138-
"@{",
139-
"[|", "|]",
140-
"(", ")", "{", "}", "[", "]", ",", ";", "_",
141-
"`(", "`"]
142-
143-
144-
export
145-
isOpChar : Char -> Bool
146-
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
147-
148-
validSymbol : Lexer
149-
validSymbol = some (pred isOpChar)
150-
151169
-- Valid symbols which have a special meaning so can't be operators
152170
export
153171
reservedSymbols : List String
154172
reservedSymbols
155173
= symbols ++
156-
["%", "\\", ":", "=", "|", "|||", "<-", "->", "=>", "?", "!",
157-
"&", "**", ".."]
174+
["%", "\\", ":", "=", "|", "|||", "?", "!", "&", "**",
175+
"..", "<-", "->", "=>", "", "", ""]
158176

159177
fromHexLit : String -> Integer
160178
fromHexLit str

tests/Main.idr

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ idrisTests
5151
"literate001", "literate002", "literate003", "literate004",
5252
"literate005", "literate006", "literate007", "literate008",
5353
"literate009",
54+
-- Unicode
55+
"unicode001",
5456
-- Interfaces
5557
"interface001", "interface002", "interface003", "interface004",
5658
"interface005", "interface006", "interface007", "interface008",
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
module Unicode
2+
3+
infix 6 ≡, ≢
4+
5+
public export
6+
interface UnicodeEq a where
7+
(≡) : a → a → Bool
8+
(≢) : a → a → Bool
9+

tests/idris2/unicode001/expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1/1: Building Unicode (Unicode.idr)

tests/idris2/unicode001/run

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
$1 -c Unicode.idr
2+
3+
rm -rf build/

0 commit comments

Comments
 (0)