@@ -2056,6 +2056,32 @@ impl<'a> From<&'a str> for Text<'a> {
2056
2056
}
2057
2057
}
2058
2058
2059
+ /// Docs
2060
+ #[ derive( Clone , Debug , PartialEq , Eq ) ]
2061
+ pub struct Binary < ' a > {
2062
+ /// Field
2063
+ pub text : Cow < ' a , [ u8 ] > ,
2064
+ }
2065
+
2066
+
2067
+ impl < ' a > Deref for Binary < ' a > {
2068
+ type Target = [ u8 ] ;
2069
+
2070
+ #[ inline]
2071
+ fn deref ( & self ) -> & Self :: Target {
2072
+ self . text . deref ( )
2073
+ }
2074
+ }
2075
+
2076
+ impl < ' a > From < & ' a [ u8 ] > for Binary < ' a > {
2077
+ #[ inline]
2078
+ fn from ( text : & ' a [ u8 ] ) -> Self {
2079
+ Self {
2080
+ text : Cow :: Borrowed ( text) ,
2081
+ }
2082
+ }
2083
+ }
2084
+
2059
2085
////////////////////////////////////////////////////////////////////////////////////////////////////
2060
2086
2061
2087
/// Simplified event which contains only these variants that used by deserializer
@@ -2074,6 +2100,8 @@ pub enum DeEvent<'a> {
2074
2100
/// [`Comment`]: Event::Comment
2075
2101
/// [`PI`]: Event::PI
2076
2102
Text ( Text < ' a > ) ,
2103
+ /// Binary undecoded
2104
+ Binary ( Binary < ' a > ) ,
2077
2105
/// End of XML document.
2078
2106
Eof ,
2079
2107
}
@@ -2217,7 +2245,11 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2217
2245
// FIXME: Actually, we should trim after decoding text, but now we trim before
2218
2246
continue ;
2219
2247
}
2220
- self . drain_text ( e. unescape_with ( |entity| self . entity_resolver . resolve ( entity) ) ?)
2248
+ match e. unescape_with ( |entity| self . entity_resolver . resolve ( entity) ) . map ( |res| self . drain_text ( res) ) {
2249
+ Ok ( x) => x,
2250
+ // failed to escape treat as binary blob.
2251
+ Err ( _) => Ok ( DeEvent :: Binary ( Binary { text : e. into_inner ( ) } ) ) ,
2252
+ }
2221
2253
}
2222
2254
PayloadEvent :: CData ( e) => self . drain_text ( e. decode ( ) ?) ,
2223
2255
PayloadEvent :: DocType ( e) => {
@@ -2687,6 +2719,8 @@ where
2687
2719
fn read_string_impl ( & mut self , allow_start : bool ) -> Result < Cow < ' de , str > , DeError > {
2688
2720
match self . next ( ) ? {
2689
2721
DeEvent :: Text ( e) => Ok ( e. text ) ,
2722
+ // SAFETY: Binary event should never be emitted for decoded strings.
2723
+ DeEvent :: Binary ( e) => unreachable ! ( "{:?}" , e) ,
2690
2724
// allow one nested level
2691
2725
DeEvent :: Start ( e) if allow_start => self . read_text ( e. name ( ) ) ,
2692
2726
DeEvent :: Start ( e) => Err ( DeError :: UnexpectedStart ( e. name ( ) . as_ref ( ) . to_owned ( ) ) ) ,
@@ -2708,10 +2742,12 @@ where
2708
2742
// The matching tag name is guaranteed by the reader
2709
2743
DeEvent :: End ( _) => Ok ( e. text ) ,
2710
2744
// SAFETY: Cannot be two consequent Text events, they would be merged into one
2711
- DeEvent :: Text ( _) => unreachable ! ( ) ,
2745
+ DeEvent :: Text ( _) | DeEvent :: Binary ( _ ) => unreachable ! ( ) ,
2712
2746
DeEvent :: Start ( e) => Err ( DeError :: UnexpectedStart ( e. name ( ) . as_ref ( ) . to_owned ( ) ) ) ,
2713
2747
DeEvent :: Eof => Err ( Error :: missed_end ( name, self . reader . decoder ( ) ) . into ( ) ) ,
2714
2748
} ,
2749
+ // SAFETY: Binary event should never be emitted for decoded strings.
2750
+ DeEvent :: Binary ( e) => unreachable ! ( "{:?}" , e) ,
2715
2751
// We can get End event in case of `<tag></tag>` or `<tag/>` input
2716
2752
// Return empty text in that case
2717
2753
// The matching tag name is guaranteed by the reader
@@ -2827,6 +2863,30 @@ where
2827
2863
}
2828
2864
}
2829
2865
2866
+ impl < ' de , R > Deserializer < ' de , IoReader < R > >
2867
+ where
2868
+ R : BufRead ,
2869
+ {
2870
+ /// Create new deserializer that will copy data from the specified reader
2871
+ /// into internal buffer.
2872
+ ///
2873
+ /// If you already have a string use [`Self::from_str`] instead, because it
2874
+ /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2875
+ /// UTF-8, you can decode it first before using [`from_str`].
2876
+ ///
2877
+ /// Deserializer created with this method will not resolve custom entities.
2878
+ pub fn from_custom_reader ( reader : Reader < R > ) -> Self {
2879
+ Self :: new (
2880
+ IoReader {
2881
+ reader,
2882
+ start_trimmer : StartTrimmer :: default ( ) ,
2883
+ buf : Vec :: new ( ) ,
2884
+ } ,
2885
+ PredefinedEntityResolver
2886
+ )
2887
+ }
2888
+ }
2889
+
2830
2890
impl < ' de , R , E > Deserializer < ' de , IoReader < R > , E >
2831
2891
where
2832
2892
R : BufRead ,
@@ -2884,6 +2944,10 @@ where
2884
2944
Cow :: Borrowed ( s) => visitor. visit_borrowed_str ( s) ,
2885
2945
Cow :: Owned ( s) => visitor. visit_string ( s) ,
2886
2946
} ,
2947
+ DeEvent :: Binary ( e) => match e. text {
2948
+ Cow :: Borrowed ( s) => visitor. visit_borrowed_bytes ( s) ,
2949
+ Cow :: Owned ( s) => visitor. visit_byte_buf ( s) ,
2950
+ } ,
2887
2951
DeEvent :: Eof => Err ( DeError :: UnexpectedEof ) ,
2888
2952
}
2889
2953
}
@@ -2914,7 +2978,7 @@ where
2914
2978
self . read_to_end ( s. name ( ) ) ?;
2915
2979
visitor. visit_unit ( )
2916
2980
}
2917
- DeEvent :: Text ( _) => visitor. visit_unit ( ) ,
2981
+ DeEvent :: Text ( _) | DeEvent :: Binary ( _ ) => visitor. visit_unit ( ) ,
2918
2982
// SAFETY: The reader is guaranteed that we don't have unmatched tags
2919
2983
// If we here, then out deserializer has a bug
2920
2984
DeEvent :: End ( e) => unreachable ! ( "{:?}" , e) ,
0 commit comments