44use std:: ops:: Range ;
55use std:: str:: Chars ;
66
7+ use Mode :: * ;
8+
79#[ cfg( test) ]
810mod tests;
911
10- /// Errors and warnings that can occur during string unescaping.
12+ /// Errors and warnings that can occur during string unescaping. They mostly
13+ /// relate to malformed escape sequences, but there are a few that are about
14+ /// other problems.
1115#[ derive( Debug , PartialEq , Eq ) ]
1216pub enum EscapeError {
1317 /// Expected 1 char, but 0 were found.
@@ -73,25 +77,24 @@ impl EscapeError {
7377 }
7478}
7579
76- /// Takes a contents of a literal (without quotes) and produces a
77- /// sequence of escaped characters or errors.
78- /// Values are returned through invoking of the provided callback.
80+ /// Takes a contents of a literal (without quotes) and produces a sequence of
81+ /// escaped characters or errors.
82+ ///
83+ /// Values are returned by invoking `callback`. For `Char` and `Byte` modes,
84+ /// the callback will be called exactly once.
7985pub fn unescape_literal < F > ( src : & str , mode : Mode , callback : & mut F )
8086where
8187 F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
8288{
8389 match mode {
84- Mode :: Char | Mode :: Byte => {
90+ Char | Byte => {
8591 let mut chars = src. chars ( ) ;
86- let res = unescape_char_or_byte ( & mut chars, mode == Mode :: Byte ) ;
92+ let res = unescape_char_or_byte ( & mut chars, mode) ;
8793 callback ( 0 ..( src. len ( ) - chars. as_str ( ) . len ( ) ) , res) ;
8894 }
89- Mode :: Str | Mode :: ByteStr => unescape_str_common ( src, mode, callback) ,
90-
91- Mode :: RawStr | Mode :: RawByteStr => {
92- unescape_raw_str_or_raw_byte_str ( src, mode == Mode :: RawByteStr , callback)
93- }
94- Mode :: CStr | Mode :: RawCStr => unreachable ! ( ) ,
95+ Str | ByteStr => unescape_str_common ( src, mode, callback) ,
96+ RawStr | RawByteStr => unescape_raw_str_or_raw_byte_str ( src, mode, callback) ,
97+ CStr | RawCStr => unreachable ! ( ) ,
9598 }
9699}
97100
@@ -117,84 +120,87 @@ pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F)
117120where
118121 F : FnMut ( Range < usize > , Result < CStrUnit , EscapeError > ) ,
119122{
120- if mode == Mode :: RawCStr {
121- unescape_raw_str_or_raw_byte_str (
122- src,
123- mode. characters_should_be_ascii ( ) ,
124- & mut |r, result| callback ( r, result. map ( CStrUnit :: Char ) ) ,
125- ) ;
126- } else {
127- unescape_str_common ( src, mode, callback) ;
123+ match mode {
124+ CStr => {
125+ unescape_str_common ( src, mode, callback) ;
126+ }
127+ RawCStr => {
128+ unescape_raw_str_or_raw_byte_str ( src, mode, & mut |r, result| {
129+ callback ( r, result. map ( CStrUnit :: Char ) )
130+ } ) ;
131+ }
132+ Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable ! ( ) ,
128133 }
129134}
130135
131136/// Takes a contents of a char literal (without quotes), and returns an
132137/// unescaped char or an error.
133138pub fn unescape_char ( src : & str ) -> Result < char , EscapeError > {
134- unescape_char_or_byte ( & mut src. chars ( ) , false )
139+ unescape_char_or_byte ( & mut src. chars ( ) , Char )
135140}
136141
137142/// Takes a contents of a byte literal (without quotes), and returns an
138143/// unescaped byte or an error.
139144pub fn unescape_byte ( src : & str ) -> Result < u8 , EscapeError > {
140- unescape_char_or_byte ( & mut src. chars ( ) , true ) . map ( byte_from_char)
145+ unescape_char_or_byte ( & mut src. chars ( ) , Byte ) . map ( byte_from_char)
141146}
142147
143148/// What kind of literal do we parse.
144149#[ derive( Debug , Clone , Copy , PartialEq ) ]
145150pub enum Mode {
146151 Char ,
147- Str ,
152+
148153 Byte ,
149- ByteStr ,
154+
155+ Str ,
150156 RawStr ,
157+
158+ ByteStr ,
151159 RawByteStr ,
160+
152161 CStr ,
153162 RawCStr ,
154163}
155164
156165impl Mode {
157166 pub fn in_double_quotes ( self ) -> bool {
158167 match self {
159- Mode :: Str
160- | Mode :: ByteStr
161- | Mode :: RawStr
162- | Mode :: RawByteStr
163- | Mode :: CStr
164- | Mode :: RawCStr => true ,
165- Mode :: Char | Mode :: Byte => false ,
168+ Str | RawStr | ByteStr | RawByteStr | CStr | RawCStr => true ,
169+ Char | Byte => false ,
166170 }
167171 }
168172
169173 /// Non-byte literals should have `\xXX` escapes that are within the ASCII range.
170- pub fn ascii_escapes_should_be_ascii ( self ) -> bool {
174+ fn ascii_escapes_should_be_ascii ( self ) -> bool {
171175 match self {
172- Mode :: Char | Mode :: Str | Mode :: RawStr => true ,
173- Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr | Mode :: CStr | Mode :: RawCStr => false ,
176+ Char | Str => true ,
177+ Byte | ByteStr | CStr => false ,
178+ RawStr | RawByteStr | RawCStr => unreachable ! ( ) ,
174179 }
175180 }
176181
177- /// Whether characters within the literal must be within the ASCII range
178- pub fn characters_should_be_ascii ( self ) -> bool {
182+ /// Whether characters within the literal must be within the ASCII range.
183+ #[ inline]
184+ fn chars_should_be_ascii ( self ) -> bool {
179185 match self {
180- Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => true ,
181- Mode :: Char | Mode :: Str | Mode :: RawStr | Mode :: CStr | Mode :: RawCStr => false ,
186+ Byte | ByteStr | RawByteStr => true ,
187+ Char | Str | RawStr | CStr | RawCStr => false ,
182188 }
183189 }
184190
185191 /// Byte literals do not allow unicode escape.
186- pub fn is_unicode_escape_disallowed ( self ) -> bool {
192+ fn is_unicode_escape_disallowed ( self ) -> bool {
187193 match self {
188- Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => true ,
189- Mode :: Char | Mode :: Str | Mode :: RawStr | Mode :: CStr | Mode :: RawCStr => false ,
194+ Byte | ByteStr | RawByteStr => true ,
195+ Char | Str | RawStr | CStr | RawCStr => false ,
190196 }
191197 }
192198
193199 pub fn prefix_noraw ( self ) -> & ' static str {
194200 match self {
195- Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => "b " ,
196- Mode :: CStr | Mode :: RawCStr => "c " ,
197- Mode :: Char | Mode :: Str | Mode :: RawStr => "" ,
201+ Char | Str | RawStr => "" ,
202+ Byte | ByteStr | RawByteStr => "b " ,
203+ CStr | RawCStr => "c " ,
198204 }
199205 }
200206}
@@ -294,22 +300,21 @@ fn scan_unicode(
294300}
295301
296302#[ inline]
297- fn ascii_check ( c : char , characters_should_be_ascii : bool ) -> Result < char , EscapeError > {
298- if characters_should_be_ascii && !c. is_ascii ( ) {
299- // Byte literal can't be a non-ascii character.
303+ fn ascii_check ( c : char , chars_should_be_ascii : bool ) -> Result < char , EscapeError > {
304+ if chars_should_be_ascii && !c. is_ascii ( ) {
300305 Err ( EscapeError :: NonAsciiCharInByte )
301306 } else {
302307 Ok ( c)
303308 }
304309}
305310
306- fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
311+ fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
307312 let c = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
308313 let res = match c {
309- '\\' => scan_escape ( chars, if is_byte { Mode :: Byte } else { Mode :: Char } ) ,
314+ '\\' => scan_escape ( chars, mode ) ,
310315 '\n' | '\t' | '\'' => Err ( EscapeError :: EscapeOnlyChar ) ,
311316 '\r' => Err ( EscapeError :: BareCarriageReturn ) ,
312- _ => ascii_check ( c, is_byte ) ,
317+ _ => ascii_check ( c, mode . chars_should_be_ascii ( ) ) ,
313318 } ?;
314319 if chars. next ( ) . is_some ( ) {
315320 return Err ( EscapeError :: MoreThanOneChar ) ;
@@ -324,6 +329,7 @@ where
324329 F : FnMut ( Range < usize > , Result < T , EscapeError > ) ,
325330{
326331 let mut chars = src. chars ( ) ;
332+ let chars_should_be_ascii = mode. chars_should_be_ascii ( ) ; // get this outside the loop
327333
328334 // The `start` and `end` computation here is complicated because
329335 // `skip_ascii_whitespace` makes us to skip over chars without counting
@@ -346,14 +352,12 @@ where
346352 _ => scan_escape :: < T > ( & mut chars, mode) ,
347353 }
348354 }
349- '\n' => Ok ( b'\n' . into ( ) ) ,
350- '\t' => Ok ( b'\t' . into ( ) ) ,
351355 '"' => Err ( EscapeError :: EscapeOnlyChar ) ,
352356 '\r' => Err ( EscapeError :: BareCarriageReturn ) ,
353- _ => ascii_check ( c, mode . characters_should_be_ascii ( ) ) . map ( Into :: into) ,
357+ _ => ascii_check ( c, chars_should_be_ascii ) . map ( Into :: into) ,
354358 } ;
355359 let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
356- callback ( start..end, res. map ( Into :: into ) ) ;
360+ callback ( start..end, res) ;
357361 }
358362}
359363
@@ -387,20 +391,21 @@ where
387391/// sequence of characters or errors.
388392/// NOTE: Raw strings do not perform any explicit character escaping, here we
389393/// only produce errors on bare CR.
390- fn unescape_raw_str_or_raw_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
394+ fn unescape_raw_str_or_raw_byte_str < F > ( src : & str , mode : Mode , callback : & mut F )
391395where
392396 F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
393397{
394398 let mut chars = src. chars ( ) ;
399+ let chars_should_be_ascii = mode. chars_should_be_ascii ( ) ; // get this outside the loop
395400
396401 // The `start` and `end` computation here matches the one in
397- // `unescape_str_or_byte_str ` for consistency, even though this function
402+ // `unescape_str_common ` for consistency, even though this function
398403 // doesn't have to worry about skipping any chars.
399404 while let Some ( c) = chars. next ( ) {
400405 let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
401406 let res = match c {
402407 '\r' => Err ( EscapeError :: BareCarriageReturnInRawString ) ,
403- _ => ascii_check ( c, is_byte ) ,
408+ _ => ascii_check ( c, chars_should_be_ascii ) ,
404409 } ;
405410 let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
406411 callback ( start..end, res) ;
@@ -410,7 +415,7 @@ where
410415#[ inline]
411416pub fn byte_from_char ( c : char ) -> u8 {
412417 let res = c as u32 ;
413- debug_assert ! ( res <= u8 :: MAX as u32 , "guaranteed because of Mode:: ByteStr" ) ;
418+ debug_assert ! ( res <= u8 :: MAX as u32 , "guaranteed because of ByteStr" ) ;
414419 res as u8
415420}
416421
0 commit comments