Skip to content

Commit 56d0acc

Browse files
authored
Merge pull request #533 from meh/fix-utf8
Fix one_of, none_of, and char when meeting UTF-8 chars
2 parents 181fce3 + 0640736 commit 56d0acc

File tree

3 files changed

+33
-13
lines changed

3 files changed

+33
-13
lines changed

src/character.rs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ macro_rules! one_of (
1313
use $crate::InputIter;
1414

1515
match ($i).iter_elements().next().map(|c| {
16-
c.find_token($inp)
16+
(c, c.find_token($inp))
1717
}) {
18-
None => $crate::IResult::Incomplete::<_, _>($crate::Needed::Size(1)),
19-
Some(false) => $crate::IResult::Error(error_position!($crate::ErrorKind::OneOf, $i)),
18+
None => $crate::IResult::Incomplete::<_, _>($crate::Needed::Size(1)),
19+
Some((_, false)) => $crate::IResult::Error(error_position!($crate::ErrorKind::OneOf, $i)),
2020
//the unwrap should be safe here
21-
Some(true) => $crate::IResult::Done($i.slice(1..), $i.iter_elements().next().unwrap().as_char())
21+
Some((c, true)) => $crate::IResult::Done($i.slice(c.len()..), $i.iter_elements().next().unwrap().as_char())
2222
}
2323
}
2424
);
@@ -35,12 +35,12 @@ macro_rules! none_of (
3535
use $crate::InputIter;
3636

3737
match ($i).iter_elements().next().map(|c| {
38-
!c.find_token($inp)
38+
(c, !c.find_token($inp))
3939
}) {
40-
None => $crate::IResult::Incomplete::<_, _>($crate::Needed::Size(1)),
41-
Some(false) => $crate::IResult::Error(error_position!($crate::ErrorKind::NoneOf, $i)),
40+
None => $crate::IResult::Incomplete::<_, _>($crate::Needed::Size(1)),
41+
Some((_, false)) => $crate::IResult::Error(error_position!($crate::ErrorKind::NoneOf, $i)),
4242
//the unwrap should be safe here
43-
Some(true) => $crate::IResult::Done($i.slice(1..), $i.iter_elements().next().unwrap().as_char())
43+
Some((c, true)) => $crate::IResult::Done($i.slice(c.len()..), $i.iter_elements().next().unwrap().as_char())
4444
}
4545
}
4646
);
@@ -56,12 +56,12 @@ macro_rules! char (
5656
use $crate::InputIter;
5757

5858
match ($i).iter_elements().next().map(|c| {
59-
c.as_char() == $c
59+
(c, c.as_char() == $c)
6060
}) {
61-
None => $crate::IResult::Incomplete::<_, _>($crate::Needed::Size(1)),
62-
Some(false) => $crate::IResult::Error(error_position!($crate::ErrorKind::Char, $i)),
61+
None => $crate::IResult::Incomplete::<_, _>($crate::Needed::Size(1)),
62+
Some((_, false)) => $crate::IResult::Error(error_position!($crate::ErrorKind::Char, $i)),
6363
//the unwrap should be safe here
64-
Some(true) => $crate::IResult::Done($i.slice(1..), $i.iter_elements().next().unwrap().as_char())
64+
Some((c, true)) => $crate::IResult::Done($i.slice(c.len()..), $i.iter_elements().next().unwrap().as_char())
6565
}
6666
}
6767
);
@@ -93,6 +93,12 @@ mod tests {
9393

9494
let b = &b"cde"[..];
9595
assert_eq!(f(b), Error(error_position!(ErrorKind::OneOf, b)));
96+
97+
named!(utf8(&str) -> char,
98+
one_of!("+\u{FF0B}"));
99+
100+
assert!(utf8("+").is_done());
101+
assert!(utf8("\u{FF0B}").is_done());
96102
}
97103

98104
#[test]

src/traits.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ pub trait AsChar {
7070
/// tests that self is an octal digit
7171
#[inline]
7272
fn is_oct_digit(self) -> bool;
73+
/// gets the len in bytes for self
74+
#[inline]
75+
fn len(self) -> usize;
7376
}
7477

7578
impl AsChar for u8 {
@@ -95,6 +98,10 @@ impl AsChar for u8 {
9598
fn is_oct_digit(self) -> bool {
9699
self >= 0x30 && self <= 0x37
97100
}
101+
#[inline]
102+
fn len(self) -> usize {
103+
1
104+
}
98105
}
99106
impl<'a> AsChar for &'a u8 {
100107
#[inline]
@@ -119,6 +126,10 @@ impl<'a> AsChar for &'a u8 {
119126
fn is_oct_digit(self) -> bool {
120127
*self >= 0x30 && *self <= 0x37
121128
}
129+
#[inline]
130+
fn len(self) -> usize {
131+
1
132+
}
122133
}
123134

124135
impl AsChar for char {
@@ -134,6 +145,8 @@ impl AsChar for char {
134145
fn is_hex_digit(self) -> bool { self.is_digit(16) }
135146
#[inline]
136147
fn is_oct_digit(self) -> bool { self.is_digit(8) }
148+
#[inline]
149+
fn len(self) -> usize { self.len_utf8() }
137150
}
138151

139152
impl<'a> AsChar for &'a char {
@@ -149,6 +162,8 @@ impl<'a> AsChar for &'a char {
149162
fn is_hex_digit(self) -> bool { self.is_digit(16) }
150163
#[inline]
151164
fn is_oct_digit(self) -> bool { self.is_digit(8) }
165+
#[inline]
166+
fn len(self) -> usize { self.len_utf8() }
152167
}
153168

154169
/// abstracts common iteration operations on the input type

tests/issues.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,3 @@ fn issue_302(input: &[u8]) -> IResult<&[u8], Option<Vec<u64>> > {
155155
( entries )
156156
)
157157
}
158-

0 commit comments

Comments
 (0)