Skip to content

Commit be596db

Browse files
committed
Use decode('unicode-escape') to unescape Unicode sequences
1 parent cd69824 commit be596db

File tree

1 file changed

+5
-8
lines changed

1 file changed

+5
-8
lines changed

fluent/syntax/parser.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,6 @@
44
from .stream import EOF, EOL, FluentParserStream
55
from .errors import ParseError
66

7-
try:
8-
from __builtin__ import unichr as chr
9-
except ImportError:
10-
pass
11-
12-
137

148
def with_span(fn):
159
def decorated(self, ps, *args, **kwargs):
@@ -593,8 +587,11 @@ def get_unicode_escape_sequence(self, ps, u, digits):
593587

594588
codepoint = int(sequence, 16)
595589
if codepoint <= 0xD7FF or 0xE000 <= codepoint:
596-
# It's a Unicode scalar value.
597-
unescaped = chr(codepoint)
590+
# It's a Unicode scalar value. The escape sequence is 4 or 6 digits
591+
# long. Convert it to a 8-digit-long \UHHHHHHHH sequence and encode
592+
# it as bytes, because in Python 3 decode is not available on str.
593+
byte_sequence = "\\U{:08x}".format(codepoint).encode('utf-8')
594+
unescaped = byte_sequence.decode('unicode-escape')
598595
else:
599596
# Escape sequences reresenting surrogate code points are
600597
# well-formed but invalid in Fluent. Replace them with U+FFFD

0 commit comments

Comments
 (0)