Skip to content

Commit 47e2a69

Browse files
committed
Add support for undecodable binary text for serde
1 parent cdff285 commit 47e2a69

File tree

8 files changed

+2204
-9
lines changed

8 files changed

+2204
-9
lines changed

src/de/map.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,15 +247,15 @@ where
247247
// We shouldn't have both `$value` and `$text` fields in the same
248248
// struct, so if we have `$value` field, the we should deserialize
249249
// text content to `$value`
250-
DeEvent::Text(_) if self.has_value_field => {
250+
DeEvent::Text(_) | DeEvent::Binary(_) if self.has_value_field => {
251251
self.source = ValueSource::Content;
252252
// Deserialize `key` from special attribute name which means
253253
// that value should be taken from the text content of the
254254
// XML node
255255
let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
256256
seed.deserialize(de).map(Some)
257257
}
258-
DeEvent::Text(_) => {
258+
DeEvent::Text(_) | DeEvent::Binary(_)=> {
259259
self.source = ValueSource::Text;
260260
// Deserialize `key` from special attribute name which means
261261
// that value should be taken from the text content of the
@@ -943,6 +943,7 @@ where
943943
// SAFETY: we just checked that the next event is Text
944944
_ => unreachable!(),
945945
},
946+
DeEvent::Binary(_) => Err(Self::Error::Unsupported("undecodable binary data among a sequence of xml elements".into())),
946947
DeEvent::Start(_) => match self.map.de.next()? {
947948
DeEvent::Start(start) => seed
948949
.deserialize(ElementDeserializer {

src/de/mod.rs

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2056,6 +2056,32 @@ impl<'a> From<&'a str> for Text<'a> {
20562056
}
20572057
}
20582058

2059+
/// Docs
2060+
#[derive(Clone, Debug, PartialEq, Eq)]
2061+
pub struct Binary<'a> {
2062+
/// Field
2063+
pub text: Cow<'a, [u8]>,
2064+
}
2065+
2066+
2067+
impl<'a> Deref for Binary<'a> {
2068+
type Target = [u8];
2069+
2070+
#[inline]
2071+
fn deref(&self) -> &Self::Target {
2072+
self.text.deref()
2073+
}
2074+
}
2075+
2076+
impl<'a> From<&'a [u8]> for Binary<'a> {
2077+
#[inline]
2078+
fn from(text: &'a [u8]) -> Self {
2079+
Self {
2080+
text: Cow::Borrowed(text),
2081+
}
2082+
}
2083+
}
2084+
20592085
////////////////////////////////////////////////////////////////////////////////////////////////////
20602086

20612087
/// Simplified event which contains only these variants that used by deserializer
@@ -2074,6 +2100,8 @@ pub enum DeEvent<'a> {
20742100
/// [`Comment`]: Event::Comment
20752101
/// [`PI`]: Event::PI
20762102
Text(Text<'a>),
2103+
/// Binary undecoded
2104+
Binary(Binary<'a>),
20772105
/// End of XML document.
20782106
Eof,
20792107
}
@@ -2217,7 +2245,11 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22172245
// FIXME: Actually, we should trim after decoding text, but now we trim before
22182246
continue;
22192247
}
2220-
self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2248+
match e.unescape_with(|entity| self.entity_resolver.resolve(entity)).map(|res| self.drain_text(res)) {
2249+
Ok(x) => x,
2250+
// failed to escape treat as binary blob.
2251+
Err(_) => Ok(DeEvent::Binary(Binary { text: e.into_inner() })),
2252+
}
22212253
}
22222254
PayloadEvent::CData(e) => self.drain_text(e.decode()?),
22232255
PayloadEvent::DocType(e) => {
@@ -2687,6 +2719,8 @@ where
26872719
fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
26882720
match self.next()? {
26892721
DeEvent::Text(e) => Ok(e.text),
2722+
// SAFETY: Binary event should never be emitted for decoded strings.
2723+
DeEvent::Binary(e) => unreachable!("{:?}", e),
26902724
// allow one nested level
26912725
DeEvent::Start(e) if allow_start => self.read_text(e.name()),
26922726
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
@@ -2708,10 +2742,12 @@ where
27082742
// The matching tag name is guaranteed by the reader
27092743
DeEvent::End(_) => Ok(e.text),
27102744
// SAFETY: Cannot be two consequent Text events, they would be merged into one
2711-
DeEvent::Text(_) => unreachable!(),
2745+
DeEvent::Text(_) | DeEvent::Binary(_) => unreachable!(),
27122746
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
27132747
DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
27142748
},
2749+
// SAFETY: Binary event should never be emitted for decoded strings.
2750+
DeEvent::Binary(e) => unreachable!("{:?}", e),
27152751
// We can get End event in case of `<tag></tag>` or `<tag/>` input
27162752
// Return empty text in that case
27172753
// The matching tag name is guaranteed by the reader
@@ -2827,6 +2863,30 @@ where
28272863
}
28282864
}
28292865

2866+
impl<'de, R> Deserializer<'de, IoReader<R>>
2867+
where
2868+
R: BufRead,
2869+
{
2870+
/// Create new deserializer that will copy data from the specified reader
2871+
/// into internal buffer.
2872+
///
2873+
/// If you already have a string use [`Self::from_str`] instead, because it
2874+
/// will borrow instead of copy. If you have `&[u8]` which is known to represent
2875+
/// UTF-8, you can decode it first before using [`from_str`].
2876+
///
2877+
/// Deserializer created with this method will not resolve custom entities.
2878+
pub fn from_custom_reader(reader: Reader<R>) -> Self {
2879+
Self::new(
2880+
IoReader {
2881+
reader,
2882+
start_trimmer: StartTrimmer::default(),
2883+
buf: Vec::new(),
2884+
},
2885+
PredefinedEntityResolver
2886+
)
2887+
}
2888+
}
2889+
28302890
impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
28312891
where
28322892
R: BufRead,
@@ -2884,6 +2944,10 @@ where
28842944
Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
28852945
Cow::Owned(s) => visitor.visit_string(s),
28862946
},
2947+
DeEvent::Binary(e) => match e.text {
2948+
Cow::Borrowed(s) => visitor.visit_borrowed_bytes(s),
2949+
Cow::Owned(s) => visitor.visit_byte_buf(s),
2950+
},
28872951
DeEvent::Eof => Err(DeError::UnexpectedEof),
28882952
}
28892953
}
@@ -2914,7 +2978,7 @@ where
29142978
self.read_to_end(s.name())?;
29152979
visitor.visit_unit()
29162980
}
2917-
DeEvent::Text(_) => visitor.visit_unit(),
2981+
DeEvent::Text(_) | DeEvent::Binary(_) => visitor.visit_unit(),
29182982
// SAFETY: The reader is guaranteed that we don't have unmatched tags
29192983
// If we here, then out deserializer has a bug
29202984
DeEvent::End(e) => unreachable!("{:?}", e),

src/de/var.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ where
4646
seed.deserialize(QNameDeserializer::from_elem(e.raw_name(), decoder)?)?,
4747
false,
4848
),
49-
DeEvent::Text(_) => (
49+
DeEvent::Text(_) | DeEvent::Binary(_) => (
5050
seed.deserialize(BorrowedStrDeserializer::<DeError>::new(TEXT_KEY))?,
5151
true,
5252
),

src/errors.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,4 +464,10 @@ pub mod serialize {
464464
Self::Custom(e.to_string())
465465
}
466466
}
467+
impl From<std::io::Error> for DeError {
468+
#[inline]
469+
fn from(e: std::io::Error) -> Self {
470+
Self::Custom(e.to_string())
471+
}
472+
}
467473
}

0 commit comments

Comments
 (0)