diff --git a/Changelog.md b/Changelog.md index 7636b5fd..ef263ff8 100644 --- a/Changelog.md +++ b/Changelog.md @@ -17,6 +17,10 @@ ### Bug Fixes +- [#843]: `xs:list` deserialization now delimits on whitespaces ` `, `\r`, `\t`, and `\n`. + +[#843]: https://github.com/tafia/quick-xml/pull/843 + ### Misc Changes diff --git a/src/de/simple_type.rs b/src/de/simple_type.rs index fefa9e97..6d4fe2ec 100644 --- a/src/de/simple_type.rs +++ b/src/de/simple_type.rs @@ -8,7 +8,6 @@ use crate::encoding::Decoder; use crate::errors::serialize::DeError; use crate::escape::unescape; use crate::utils::CowRef; -use memchr::memchr; use serde::de::value::UnitDeserializer; use serde::de::{ DeserializeSeed, Deserializer, EnumAccess, IntoDeserializer, SeqAccess, VariantAccess, Visitor, @@ -361,14 +360,19 @@ impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { T: DeserializeSeed<'de>, { if let Some(mut content) = self.content.take() { - const DELIMITER: u8 = b' '; + // NOTE: when normalization will be implemented, it may be enough + // to check only b' ', because all whitespaces will be normalized + const DELIMETERS: &str = " \t\r\n"; loop { let string = content.as_str(); if string.is_empty() { return Ok(None); } - return match memchr(DELIMITER, string.as_bytes()) { + + let first_delimiter = string.find(|c| DELIMETERS.contains(c)); + + return match first_delimiter { // No delimiters in the `content`, deserialize it as a whole atomic None => match content { Content::Input(s) => seed.deserialize(AtomicDeserializer { @@ -391,7 +395,7 @@ impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { // `content` started with a space, skip them all Some(0) => { // Skip all spaces - let start = string.as_bytes().iter().position(|ch| *ch != DELIMITER); + let start = string.find(|c| !DELIMETERS.contains(c)); content = match (start, content) { // We cannot find any non-space character, so string contains only spaces (None, _) => return Ok(None), @@ -1168,6 +1172,23 @@ mod tests { assert_eq!(seq.next_element::<()>().unwrap(), None); assert_eq!(seq.next_element::<()>().unwrap(), None); } + + #[test] + fn mixed_whitespace_delimiters() { + let mut seq = ListIter { + content: Some(Content::Input("one two\nthree\rfour\tfive six")), + escaped: true, + }; + + assert_eq!(seq.next_element::<&str>().unwrap(), Some("one")); + assert_eq!(seq.next_element::<&str>().unwrap(), Some("two")); + assert_eq!(seq.next_element::<&str>().unwrap(), Some("three")); + assert_eq!(seq.next_element::<&str>().unwrap(), Some("four")); + assert_eq!(seq.next_element::<&str>().unwrap(), Some("five")); + assert_eq!(seq.next_element::<&str>().unwrap(), Some("six")); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + } } mod utf8 {