Skip to content

Commit 81936c3

Browse files
committed
Make trimming more consistent with regular reader api
1 parent 56246ea commit 81936c3

File tree

3 files changed

+30
-33
lines changed

3 files changed

+30
-33
lines changed

src/de/mod.rs

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2169,31 +2169,6 @@ struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolve
21692169
entity_resolver: E,
21702170
}
21712171

2172-
fn trim_cow<'a, F>(value: Cow<'a, str>, trim: F) -> Cow<'a, str>
2173-
where
2174-
F: FnOnce(&str) -> &str,
2175-
{
2176-
match value {
2177-
Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
2178-
Cow::Owned(mut bytes) => {
2179-
let trimmed = trim(&bytes);
2180-
if trimmed.len() != bytes.len() {
2181-
bytes = trimmed.to_string();
2182-
}
2183-
Cow::Owned(bytes)
2184-
}
2185-
}
2186-
}
2187-
2188-
/// Removes trailing XML whitespace bytes from text content.
2189-
///
2190-
/// Returns `true` if content is empty after that
2191-
fn inplace_trim_end(mut s: &mut Cow<str>) -> bool {
2192-
let c: Cow<str> = replace(&mut s, Cow::Borrowed(""));
2193-
*s = trim_cow(c, str::trim_end);
2194-
s.is_empty()
2195-
}
2196-
21972172
impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
21982173
fn new(mut reader: R, entity_resolver: E) -> Self {
21992174
// Lookahead by one event immediately, so we do not need to check in the
@@ -2365,6 +2340,16 @@ where
23652340
T::deserialize(&mut de)
23662341
}
23672342

2343+
/// Deserialize from a custom reader.
2344+
pub fn from_custom_reader<R, T>(reader: Reader<R>) -> Result<T, DeError>
2345+
where
2346+
R: BufRead,
2347+
T: DeserializeOwned,
2348+
{
2349+
let mut de = Deserializer::from_custom_reader(reader);
2350+
T::deserialize(&mut de)
2351+
}
2352+
23682353
// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
23692354
// valid boolean representations are only "true", "false", "1", and "0"
23702355
fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
@@ -2871,8 +2856,6 @@ where
28712856
pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
28722857
let mut reader = Reader::from_str(source);
28732858
let config = reader.config_mut();
2874-
config.trim_text_start = true;
2875-
config.trim_text_end = true;
28762859
config.expand_empty_elements = true;
28772860

28782861
Self::new(
@@ -3125,7 +3108,7 @@ impl StartTrimmer {
31253108
/// Converts raw reader's event into a payload event.
31263109
/// Returns `None`, if event should be skipped.
31273110
#[inline(always)]
3128-
fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
3111+
fn trim<'a>(&mut self, event: Event<'a>, trim_text_start: bool) -> Option<PayloadEvent<'a>> {
31293112
let (event, trim_next_event) = match event {
31303113
Event::DocType(e) => (PayloadEvent::DocType(e), true),
31313114
Event::Start(e) => (PayloadEvent::Start(e), true),
@@ -3136,7 +3119,8 @@ impl StartTrimmer {
31363119
Event::CData(e) => (PayloadEvent::CData(e), false),
31373120
Event::Text(mut e) => {
31383121
// If event is empty after trimming, skip it
3139-
if self.trim_start && e.inplace_trim_start() {
3122+
// Or if event is all white space, skip it regardless of trimming settings
3123+
if (trim_text_start && self.trim_start && e.inplace_trim_start()) || e.is_all_whitespace() {
31403124
return None;
31413125
}
31423126
(PayloadEvent::Text(e), false)
@@ -3229,8 +3213,9 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
32293213
loop {
32303214
self.buf.clear();
32313215

3216+
let trim_text_start = self.reader.config().trim_text_start;
32323217
let event = self.reader.read_event_into(&mut self.buf)?;
3233-
if let Some(event) = self.start_trimmer.trim(event) {
3218+
if let Some(event) = self.start_trimmer.trim(event, trim_text_start) {
32343219
return Ok(event.into_owned());
32353220
}
32363221
}
@@ -3299,7 +3284,7 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
32993284
fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
33003285
loop {
33013286
let event = self.reader.read_event()?;
3302-
if let Some(event) = self.start_trimmer.trim(event) {
3287+
if let Some(event) = self.start_trimmer.trim(event, self.config().trim_text_start) {
33033288
return Ok(event);
33043289
}
33053290
}
@@ -4477,7 +4462,7 @@ mod tests {
44774462
fn start() {
44784463
let mut de = make_de(" text <tag1><tag2>");
44794464
// Text is trimmed from both sides
4480-
assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4465+
assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
44814466
assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
44824467
assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
44834468
assert_eq!(de.next().unwrap(), DeEvent::Eof);

src/events/mod.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ use crate::escape::{
5353
use crate::name::{LocalName, QName};
5454
#[cfg(feature = "serialize")]
5555
use crate::utils::CowRef;
56-
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string};
56+
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, is_whitespace};
5757
use attributes::{Attribute, Attributes};
5858

5959
/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
@@ -622,6 +622,11 @@ impl<'a> BytesText<'a> {
622622
self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
623623
self.content.is_empty()
624624
}
625+
626+
/// Returns `true` if all characters are whitespace characters.
627+
pub fn is_all_whitespace(&mut self) -> bool {
628+
self.content.iter().all(|&x| is_whitespace(x))
629+
}
625630
}
626631

627632
impl<'a> Debug for BytesText<'a> {

tests/reader.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ small_buffers_tests!(
1515
read_event_into: std::io::BufReader<_>
1616
);
1717

18+
#[test]
19+
fn test_text() {
20+
let mut r = Reader::from_str(" text ");
21+
22+
assert_eq!(r.read_event().unwrap(), Text(BytesText::new(" text ")));
23+
}
24+
1825
#[test]
1926
fn test_start_end() {
2027
let mut r = Reader::from_str("<a></a>");

0 commit comments

Comments
 (0)