@@ -2005,7 +2005,7 @@ use crate::{
2005
2005
errors:: Error ,
2006
2006
events:: { BytesCData , BytesEnd , BytesStart , BytesText , Event } ,
2007
2007
name:: QName ,
2008
- reader:: Reader ,
2008
+ reader:: { Config , Reader } ,
2009
2009
} ;
2010
2010
use serde:: de:: { self , Deserialize , DeserializeOwned , DeserializeSeed , SeqAccess , Visitor } ;
2011
2011
use std:: borrow:: Cow ;
@@ -2169,6 +2169,31 @@ struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolve
2169
2169
entity_resolver : E ,
2170
2170
}
2171
2171
2172
+ fn trim_cow < ' a , F > ( value : Cow < ' a , str > , trim : F ) -> Cow < ' a , str >
2173
+ where
2174
+ F : FnOnce ( & str ) -> & str ,
2175
+ {
2176
+ match value {
2177
+ Cow :: Borrowed ( bytes) => Cow :: Borrowed ( trim ( bytes) ) ,
2178
+ Cow :: Owned ( mut bytes) => {
2179
+ let trimmed = trim ( & bytes) ;
2180
+ if trimmed. len ( ) != bytes. len ( ) {
2181
+ bytes = trimmed. to_string ( ) ;
2182
+ }
2183
+ Cow :: Owned ( bytes)
2184
+ }
2185
+ }
2186
+ }
2187
+
2188
+ /// Removes trailing XML whitespace bytes from text content.
2189
+ ///
2190
+ /// Returns `true` if content is empty after that
2191
+ fn inplace_trim_end ( mut s : & mut Cow < str > ) -> bool {
2192
+ let c: Cow < str > = replace ( & mut s, Cow :: Borrowed ( "" ) ) ;
2193
+ * s = trim_cow ( c, str:: trim_end) ;
2194
+ s. is_empty ( )
2195
+ }
2196
+
2172
2197
impl < ' i , R : XmlRead < ' i > , E : EntityResolver > XmlReader < ' i , R , E > {
2173
2198
fn new ( mut reader : R , entity_resolver : E ) -> Self {
2174
2199
// Lookahead by one event immediately, so we do not need to check in the
@@ -2206,20 +2231,23 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2206
2231
/// Read all consequent [`Text`] and [`CData`] events until non-text event
2207
2232
/// occurs. Content of all events would be appended to `result` and returned
2208
2233
/// as [`DeEvent::Text`].
2234
+ ///
2235
+ /// If the resulting text empty, this function returns None to avoid creating an empty Event.
2209
2236
///
2210
2237
/// [`Text`]: PayloadEvent::Text
2211
2238
/// [`CData`]: PayloadEvent::CData
2212
- fn drain_text ( & mut self , mut result : Cow < ' i , str > ) -> Result < DeEvent < ' i > , DeError > {
2239
+ fn drain_text ( & mut self , mut result : Cow < ' i , str > ) -> Result < Option < DeEvent < ' i > > , DeError > {
2213
2240
loop {
2214
2241
if self . current_event_is_last_text ( ) {
2215
2242
break ;
2216
2243
}
2217
-
2218
2244
match self . next_impl ( ) ? {
2219
2245
PayloadEvent :: Text ( mut e) => {
2220
2246
if self . current_event_is_last_text ( ) {
2221
2247
// FIXME: Actually, we should trim after decoding text, but now we trim before
2222
- e. inplace_trim_end ( ) ;
2248
+ if self . reader . config ( ) . trim_text_end {
2249
+ e. inplace_trim_end ( ) ;
2250
+ }
2223
2251
}
2224
2252
result
2225
2253
. to_mut ( )
@@ -2228,10 +2256,12 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2228
2256
PayloadEvent :: CData ( e) => result. to_mut ( ) . push_str ( & e. decode ( ) ?) ,
2229
2257
2230
2258
// SAFETY: current_event_is_last_text checks that event is Text or CData
2231
- _ => unreachable ! ( "Only `Text` and `CData` events can come here" ) ,
2259
+ e => {
2260
+ unreachable ! ( "Only `Text` and `CData` events can come here: {:?}" , & e) ;
2261
+ }
2232
2262
}
2233
2263
}
2234
- Ok ( DeEvent :: Text ( Text { text : result } ) )
2264
+ Ok ( Some ( DeEvent :: Text ( Text { text : result } ) ) )
2235
2265
}
2236
2266
2237
2267
/// Return an input-borrowing event.
@@ -2241,17 +2271,24 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2241
2271
PayloadEvent :: Start ( e) => Ok ( DeEvent :: Start ( e) ) ,
2242
2272
PayloadEvent :: End ( e) => Ok ( DeEvent :: End ( e) ) ,
2243
2273
PayloadEvent :: Text ( mut e) => {
2244
- if self . current_event_is_last_text ( ) && e. inplace_trim_end ( ) {
2245
- // FIXME: Actually, we should trim after decoding text, but now we trim before
2246
- continue ;
2274
+ if self . current_event_is_last_text ( ) {
2275
+ if self . reader . config ( ) . trim_text_end && e. inplace_trim_end ( ) {
2276
+ continue ;
2277
+ }
2247
2278
}
2279
+
2248
2280
match e. unescape_with ( |entity| self . entity_resolver . resolve ( entity) ) . map ( |res| self . drain_text ( res) ) {
2249
- Ok ( x) => x,
2281
+ Ok ( Ok ( None ) ) => continue ,
2282
+ Ok ( Ok ( Some ( x) ) ) => Ok ( x) ,
2283
+ Ok ( Err ( x) ) => Err ( x) ,
2250
2284
// failed to escape treat as binary blob.
2251
2285
Err ( _) => Ok ( DeEvent :: Binary ( Binary { text : e. into_inner ( ) } ) ) ,
2252
2286
}
2253
2287
}
2254
- PayloadEvent :: CData ( e) => self . drain_text ( e. decode ( ) ?) ,
2288
+ PayloadEvent :: CData ( e) => match self . drain_text ( e. decode ( ) ?) . transpose ( ) {
2289
+ None => continue ,
2290
+ Some ( x) => x,
2291
+ } ,
2255
2292
PayloadEvent :: DocType ( e) => {
2256
2293
self . entity_resolver
2257
2294
. capture ( e)
@@ -2834,6 +2871,8 @@ where
2834
2871
pub fn from_str_with_resolver ( source : & ' de str , entity_resolver : E ) -> Self {
2835
2872
let mut reader = Reader :: from_str ( source) ;
2836
2873
let config = reader. config_mut ( ) ;
2874
+ config. trim_text_start = true ;
2875
+ config. trim_text_end = true ;
2837
2876
config. expand_empty_elements = true ;
2838
2877
2839
2878
Self :: new (
@@ -3135,6 +3174,9 @@ pub trait XmlRead<'i> {
3135
3174
3136
3175
/// A copy of the reader's decoder used to decode strings.
3137
3176
fn decoder ( & self ) -> Decoder ;
3177
+
3178
+ /// Returns a reference to the reader config.
3179
+ fn config ( & self ) -> & Config ;
3138
3180
}
3139
3181
3140
3182
/// XML input source that reads from a std::io input stream.
@@ -3204,6 +3246,10 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
3204
3246
fn decoder ( & self ) -> Decoder {
3205
3247
self . reader . decoder ( )
3206
3248
}
3249
+
3250
+ fn config ( & self ) -> & Config {
3251
+ self . reader . config ( )
3252
+ }
3207
3253
}
3208
3254
3209
3255
/// XML input source that reads from a slice of bytes and can borrow from it.
@@ -3269,6 +3315,10 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
3269
3315
fn decoder ( & self ) -> Decoder {
3270
3316
self . reader . decoder ( )
3271
3317
}
3318
+
3319
+ fn config ( & self ) -> & Config {
3320
+ self . reader . config ( )
3321
+ }
3272
3322
}
3273
3323
3274
3324
#[ cfg( test) ]
0 commit comments