diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5fb9aaa3..b127938c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: include: # Test MSRV - - rust: 1.62.0 # keep in sync with manifest rust-version + - rust: 1.65.0 # keep in sync with manifest rust-version TARGET: x86_64-unknown-linux-gnu # Test nightly but don't fail diff --git a/CHANGELOG.md b/CHANGELOG.md index f0fa068f..19f6e6b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +### Breaking +- MSRV is now `1.65.0`. + ### Added - Support for optional package `defmt` which allows for easy conversion for error types when using tools like `probe-rs` for logging over debuggers. - Implement `Serializer::collect_str` - Derive `Serialize` for `de::Error` and `ser::Error` +- Support for deserializing escaped strings. ### Changed diff --git a/Cargo.toml b/Cargo.toml index 5d924c1e..30f6740a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ categories = ["no-std"] description = "serde-json for no_std programs" documentation = "https://docs.rs/serde-json-core" edition = "2018" -rust-version = "1.62.0" # keep in sync with ci, src/lib.rs, and README +rust-version = "1.65.0" # keep in sync with ci, src/lib.rs, and README keywords = ["serde", "json"] license = "MIT OR Apache-2.0" name = "serde-json-core" diff --git a/README.md b/README.md index e63ad91c..f4c3c2b3 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ This project is developed and maintained by the [rust-embedded-community]. ## Minimum Supported Rust Version (MSRV) -This crate is guaranteed to compile on stable Rust 1.62.0 and up. It *might* +This crate is guaranteed to compile on stable Rust 1.65.0 and up. It *might* compile with older versions but that may change in any new patch release. ## License diff --git a/src/de/enum_.rs b/src/de/enum_.rs index 9012873a..2c3c6eb6 100644 --- a/src/de/enum_.rs +++ b/src/de/enum_.rs @@ -2,17 +2,17 @@ use serde::de; use crate::de::{Deserializer, Error, Result}; -pub(crate) struct UnitVariantAccess<'a, 'b> { - de: &'a mut Deserializer<'b>, +pub(crate) struct UnitVariantAccess<'a, 'b, 's> { + de: &'a mut Deserializer<'b, 's>, } -impl<'a, 'b> UnitVariantAccess<'a, 'b> { - pub(crate) fn new(de: &'a mut Deserializer<'b>) -> Self { +impl<'a, 'b, 's> UnitVariantAccess<'a, 'b, 's> { + pub(crate) fn new(de: &'a mut Deserializer<'b, 's>) -> Self { UnitVariantAccess { de } } } -impl<'a, 'de> de::EnumAccess<'de> for UnitVariantAccess<'a, 'de> { +impl<'a, 'de, 's> de::EnumAccess<'de> for UnitVariantAccess<'a, 'de, 's> { type Error = Error; type Variant = Self; @@ -25,7 +25,7 @@ impl<'a, 'de> de::EnumAccess<'de> for UnitVariantAccess<'a, 'de> { } } -impl<'de, 'a> de::VariantAccess<'de> for UnitVariantAccess<'a, 'de> { +impl<'de, 'a, 's> de::VariantAccess<'de> for UnitVariantAccess<'a, 'de, 's> { type Error = Error; fn unit_variant(self) -> Result<()> { @@ -54,17 +54,17 @@ impl<'de, 'a> de::VariantAccess<'de> for UnitVariantAccess<'a, 'de> { } } -pub(crate) struct VariantAccess<'a, 'b> { - de: &'a mut Deserializer<'b>, +pub(crate) struct VariantAccess<'a, 'b, 's> { + de: &'a mut Deserializer<'b, 's>, } -impl<'a, 'b> VariantAccess<'a, 'b> { - pub(crate) fn new(de: &'a mut Deserializer<'b>) -> Self { +impl<'a, 'b, 's> VariantAccess<'a, 'b, 's> { + pub(crate) fn new(de: &'a mut Deserializer<'b, 's>) -> Self { VariantAccess { de } } } -impl<'a, 'de> de::EnumAccess<'de> for VariantAccess<'a, 'de> { +impl<'a, 'de, 's> de::EnumAccess<'de> for VariantAccess<'a, 'de, 's> { type Error = Error; type Variant = Self; @@ -78,7 +78,7 @@ impl<'a, 'de> de::EnumAccess<'de> for VariantAccess<'a, 'de> { } } -impl<'de, 'a> de::VariantAccess<'de> for VariantAccess<'a, 'de> { +impl<'de, 'a, 's> de::VariantAccess<'de> for VariantAccess<'a, 'de, 's> { type Error = Error; fn unit_variant(self) -> Result<()> { diff --git a/src/de/map.rs b/src/de/map.rs index 7da47d3f..c38c81d8 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -2,18 +2,18 @@ use serde::de::{self, Visitor}; use crate::de::{Deserializer, Error}; -pub struct MapAccess<'a, 'b> { - de: &'a mut Deserializer<'b>, +pub struct MapAccess<'a, 'b, 's> { + de: &'a mut Deserializer<'b, 's>, first: bool, } -impl<'a, 'b> MapAccess<'a, 'b> { - pub(crate) fn new(de: &'a mut Deserializer<'b>) -> Self { +impl<'a, 'b, 's> MapAccess<'a, 'b, 's> { + pub(crate) fn new(de: &'a mut Deserializer<'b, 's>) -> Self { MapAccess { de, first: true } } } -impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { +impl<'a, 'de, 's> de::MapAccess<'de> for MapAccess<'a, 'de, 's> { type Error = Error; fn next_key_seed(&mut self, seed: K) -> Result, Error> @@ -57,11 +57,11 @@ impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { } } -struct MapKey<'a, 'b> { - de: &'a mut Deserializer<'b>, +struct MapKey<'a, 'b, 's> { + de: &'a mut Deserializer<'b, 's>, } -impl<'de, 'a> de::Deserializer<'de> for MapKey<'a, 'de> { +impl<'de, 'a, 's> de::Deserializer<'de> for MapKey<'a, 'de, 's> { type Error = Error; fn deserialize_any(self, _visitor: V) -> Result diff --git a/src/de/mod.rs b/src/de/mod.rs index d1c5048a..88e8fc2f 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -23,6 +23,12 @@ pub type Result = core::result::Result; #[cfg_attr(feature = "defmt", derive(defmt::Format))] #[non_exhaustive] pub enum Error { + /// Can’t parse a value without knowing its expected type. + AnyIsUnsupported, + + /// Cannot parse a sequence of bytes. + BytesIsUnsupported, + /// EOF while parsing a list. EofWhileParsingList, @@ -62,6 +68,12 @@ pub enum Error { /// Invalid unicode code point. InvalidUnicodeCodePoint, + /// Invalid String Escape Sequence + InvalidEscapeSequence, + + /// Escaped String length exceeds buffer size + EscapedStringIsTooLong, + /// Object key is not a string. KeyMustBeAString, @@ -83,16 +95,33 @@ pub enum Error { impl serde::de::StdError for Error {} +impl From for Error { + fn from(error: crate::str::StringUnescapeError) -> Self { + match error { + crate::str::StringUnescapeError::InvalidEscapeSequence => Self::InvalidEscapeSequence, + } + } +} + /// A structure that deserializes Rust values from JSON in a buffer. -pub struct Deserializer<'b> { +pub struct Deserializer<'b, 's> { slice: &'b [u8], index: usize, + string_unescape_buffer: Option<&'s mut [u8]>, } -impl<'a> Deserializer<'a> { - /// Create a new `Deserializer` - pub fn new(slice: &'a [u8]) -> Deserializer<'_> { - Deserializer { slice, index: 0 } +impl<'a, 's> Deserializer<'a, 's> { + /// Create a new `Deserializer`, optionally with a buffer to use to unescape strings. + /// If not present, strings are not unescaped. + pub fn new( + slice: &'a [u8], + string_unescape_buffer: Option<&'s mut [u8]>, + ) -> Deserializer<'a, 's> { + Deserializer { + slice, + index: 0, + string_unescape_buffer, + } } fn eat_char(&mut self) { @@ -172,7 +201,14 @@ impl<'a> Deserializer<'a> { } } + /// Parse a string, returning the escaped string. fn parse_str(&mut self) -> Result<&'a str> { + if self.parse_whitespace().ok_or(Error::EofWhileParsingValue)? == b'"' { + self.eat_char(); + } else { + return Err(Error::InvalidType); + } + let start = self.index; loop { match self.peek() { @@ -205,6 +241,7 @@ impl<'a> Deserializer<'a> { } else { let end = self.index; self.eat_char(); + return str::from_utf8(&self.slice[start..end]) .map_err(|_| Error::InvalidUnicodeCodePoint); } @@ -344,7 +381,7 @@ macro_rules! deserialize_fromstr { }}; } -impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { +impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { type Error = Error; /// Unsupported. Can’t parse a value without knowing its expected type. @@ -352,7 +389,7 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { where V: Visitor<'de>, { - unreachable!() + Err(Error::AnyIsUnsupported) } fn deserialize_bool(self, visitor: V) -> Result @@ -446,34 +483,61 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { deserialize_fromstr!(self, visitor, f64, visit_f64, b"0123456789+-.eE") } - fn deserialize_char(self, _visitor: V) -> Result + fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { - unreachable!() + self.deserialize_str(visitor) } fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'de>, { - let peek = self.parse_whitespace().ok_or(Error::EofWhileParsingValue)?; + let escaped_string = self.parse_str()?; - match peek { - b'"' => { - self.eat_char(); - visitor.visit_borrowed_str(self.parse_str()?) - } - _ => Err(Error::InvalidType), + // If the unescape buffer is not provided, skip unescaping strings + let Some(string_unescape_buffer) = self.string_unescape_buffer.as_deref_mut() else { + return visitor.visit_borrowed_str(escaped_string); + }; + + // If the escaped string doesn't contain '\\', it' can't have any escaped characters + if !escaped_string.as_bytes().contains(&b'\\') { + return visitor.visit_borrowed_str(escaped_string); } + + let mut string_unescape_buffer_write_position = 0; + + for fragment in crate::str::EscapedStr(escaped_string).fragments() { + let char_encode_buffer = &mut [0; 4]; + + let unescaped_bytes = match fragment? { + crate::str::EscapedStringFragment::NotEscaped(fragment) => fragment.as_bytes(), + crate::str::EscapedStringFragment::Escaped(c) => { + c.encode_utf8(char_encode_buffer).as_bytes() + } + }; + + string_unescape_buffer[string_unescape_buffer_write_position..] + .get_mut(..unescaped_bytes.len()) + .ok_or(Error::EscapedStringIsTooLong)? + .copy_from_slice(unescaped_bytes); + + string_unescape_buffer_write_position += unescaped_bytes.len(); + } + + visitor.visit_str( + str::from_utf8(&string_unescape_buffer[..string_unescape_buffer_write_position]) + .map_err(|_| Error::InvalidUnicodeCodePoint)?, + ) } /// Unsupported. String is not available in no-std. - fn deserialize_string(self, _visitor: V) -> Result + fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { - unreachable!() + self.deserialize_str(visitor) } /// Unsupported @@ -481,7 +545,7 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { where V: Visitor<'de>, { - unreachable!() + Err(Error::BytesIsUnsupported) } /// Unsupported @@ -489,7 +553,7 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { where V: Visitor<'de>, { - unreachable!() + Err(Error::BytesIsUnsupported) } fn deserialize_option(self, visitor: V) -> Result @@ -534,12 +598,41 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { self.deserialize_unit(visitor) } - /// Unsupported. We can’t parse newtypes because we don’t know the underlying type. - fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result + fn deserialize_newtype_struct(self, name: &'static str, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_newtype_struct(self) + // If the newtype struct is an `EscapedStr`... + if name == crate::str::EscapedStr::NAME { + // ...deserialize as an escaped string instead. + + struct EscapedStringDeserializer<'a, 'de, 's>(&'a mut Deserializer<'de, 's>); + + impl<'a, 'de, 's> serde::Deserializer<'de> for EscapedStringDeserializer<'a, 'de, 's> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + // The only structure which is deserialized at this point is an `EscapedStr`, + // so pass the escaped string to its implementation of visit_borrowed_str. + // This line defacto becomes `Ok(EscapedStr(self.0.parse_str()?))`. + visitor.visit_borrowed_str(self.0.parse_str()?) + } + + // `EscapedStr` only deserializes strings, so we might as well forward all methods to `deserialize_any`. + serde::forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct seq tuple + tuple_struct map struct enum identifier ignored_any + } + } + + visitor.visit_newtype_struct(EscapedStringDeserializer(self)) + } else { + visitor.visit_newtype_struct(self) + } } fn deserialize_seq(self, visitor: V) -> Result @@ -735,19 +828,49 @@ impl fmt::Display for Error { } } -/// Deserializes an instance of type `T` from bytes of JSON text -/// Returns the value and the number of bytes consumed in the process -pub fn from_slice<'a, T>(v: &'a [u8]) -> Result<(T, usize)> +fn from_slice_maybe_escaped<'a, T>( + v: &'a [u8], + string_unescape_buffer: Option<&mut [u8]>, +) -> Result<(T, usize)> where T: de::Deserialize<'a>, { - let mut de = Deserializer::new(v); + let mut de = Deserializer::new(v, string_unescape_buffer); let value = de::Deserialize::deserialize(&mut de)?; let length = de.end()?; Ok((value, length)) } +/// Deserializes an instance of type `T` from bytes of JSON text, using the provided buffer to unescape strings +/// Returns the value and the number of bytes consumed in the process +pub fn from_slice_escaped<'a, T>( + v: &'a [u8], + string_unescape_buffer: &mut [u8], +) -> Result<(T, usize)> +where + T: de::Deserialize<'a>, +{ + from_slice_maybe_escaped(v, Some(string_unescape_buffer)) +} + +/// Deserializes an instance of type `T` from bytes of JSON text +/// Returns the value and the number of bytes consumed in the process +pub fn from_slice<'a, T>(v: &'a [u8]) -> Result<(T, usize)> +where + T: de::Deserialize<'a>, +{ + from_slice_maybe_escaped(v, None) +} + +/// Deserializes an instance of type T from a string of JSON text, using the provided buffer to unescape strings +pub fn from_str_escaped<'a, T>(s: &'a str, string_unescape_buffer: &mut [u8]) -> Result<(T, usize)> +where + T: de::Deserialize<'a>, +{ + from_slice_escaped(s.as_bytes(), string_unescape_buffer) +} + /// Deserializes an instance of type T from a string of JSON text pub fn from_str<'a, T>(s: &'a str) -> Result<(T, usize)> where @@ -758,7 +881,6 @@ where #[cfg(test)] mod tests { - use core::str::FromStr; use serde_derive::Deserialize; #[derive(Debug, Deserialize, PartialEq)] @@ -820,49 +942,118 @@ mod tests { assert_eq!(crate::from_str(r#" "thing" "#), Ok((Type::Thing, 9))); } + #[test] + fn char() { + fn from_str_test<'de, T: serde::Deserialize<'de>>( + s: &'de str, + ) -> super::Result<(T, usize)> { + crate::from_str_escaped(s, &mut [0; 8]) + } + + assert_eq!(from_str_test(r#""n""#), Ok(('n', 3))); + assert_eq!(from_str_test(r#""\"""#), Ok(('"', 4))); + assert_eq!(from_str_test(r#""\\""#), Ok(('\\', 4))); + assert_eq!(from_str_test(r#""/""#), Ok(('/', 3))); + assert_eq!(from_str_test(r#""\b""#), Ok(('\x08', 4))); + assert_eq!(from_str_test(r#""\f""#), Ok(('\x0C', 4))); + assert_eq!(from_str_test(r#""\n""#), Ok(('\n', 4))); + assert_eq!(from_str_test(r#""\r""#), Ok(('\r', 4))); + assert_eq!(from_str_test(r#""\t""#), Ok(('\t', 4))); + assert_eq!(from_str_test(r#""\u000b""#), Ok(('\x0B', 8))); + assert_eq!(from_str_test(r#""\u000B""#), Ok(('\x0B', 8))); + assert_eq!(from_str_test(r#""Σ""#), Ok(('Σ', 4))); + } + #[test] fn str() { + // No escaping, so can borrow from the input assert_eq!(crate::from_str(r#" "hello" "#), Ok(("hello", 9))); assert_eq!(crate::from_str(r#" "" "#), Ok(("", 4))); assert_eq!(crate::from_str(r#" " " "#), Ok((" ", 5))); assert_eq!(crate::from_str(r#" "👏" "#), Ok(("👏", 8))); - // no unescaping is done (as documented as a known issue in lib.rs) - assert_eq!(crate::from_str(r#" "hel\tlo" "#), Ok(("hel\\tlo", 11))); - assert_eq!(crate::from_str(r#" "hello \\" "#), Ok(("hello \\\\", 12))); + fn s(s: &'static str) -> heapless::String<1024> { + s.parse().expect("Failed to create test string") + } + + fn from_str_test<'de, T: serde::Deserialize<'de>>( + s: &'de str, + ) -> super::Result<(T, usize)> { + crate::from_str_escaped(s, &mut [0; 16]) + } // escaped " in the string content - assert_eq!(crate::from_str(r#" "foo\"bar" "#), Ok((r#"foo\"bar"#, 12))); + assert_eq!(from_str_test(r#" "foo\"bar" "#), Ok((s(r#"foo"bar"#), 12))); assert_eq!( - crate::from_str(r#" "foo\\\"bar" "#), - Ok((r#"foo\\\"bar"#, 14)) + from_str_test(r#" "foo\\\"bar" "#), + Ok((s(r#"foo\"bar"#), 14)) ); assert_eq!( - crate::from_str(r#" "foo\"\"bar" "#), - Ok((r#"foo\"\"bar"#, 14)) + from_str_test(r#" "foo\"\"bar" "#), + Ok((s(r#"foo""bar"#), 14)) ); - assert_eq!(crate::from_str(r#" "\"bar" "#), Ok((r#"\"bar"#, 9))); - assert_eq!(crate::from_str(r#" "foo\"" "#), Ok((r#"foo\""#, 9))); - assert_eq!(crate::from_str(r#" "\"" "#), Ok((r#"\""#, 6))); + assert_eq!(from_str_test(r#" "\"bar" "#), Ok((s(r#""bar"#), 9))); + assert_eq!(from_str_test(r#" "foo\"" "#), Ok((s(r#"foo""#), 9))); + assert_eq!(from_str_test(r#" "\"" "#), Ok((s(r#"""#), 6))); // non-excaped " preceded by backslashes assert_eq!( - crate::from_str(r#" "foo bar\\" "#), - Ok((r#"foo bar\\"#, 13)) + from_str_test(r#" "foo bar\\" "#), + Ok((s(r#"foo bar\"#), 13)) + ); + assert_eq!( + from_str_test(r#" "foo bar\\\\" "#), + Ok((s(r#"foo bar\\"#), 15)) ); assert_eq!( - crate::from_str(r#" "foo bar\\\\" "#), - Ok((r#"foo bar\\\\"#, 15)) + from_str_test(r#" "foo bar\\\\\\" "#), + Ok((s(r#"foo bar\\\"#), 17)) ); assert_eq!( - crate::from_str(r#" "foo bar\\\\\\" "#), - Ok((r#"foo bar\\\\\\"#, 17)) + from_str_test(r#" "foo bar\\\\\\\\" "#), + Ok((s(r#"foo bar\\\\"#), 19)) + ); + assert_eq!(from_str_test(r#" "\\" "#), Ok((s(r#"\"#), 6))); + } + + #[test] + fn tuple_of_str() { + fn s(s: &'static str) -> heapless::String<1024> { + s.parse().expect("Failed to create test string") + } + + fn from_str_test<'de, T: serde::Deserialize<'de>>( + s: &'de str, + ) -> super::Result<(T, usize)> { + crate::from_str_escaped(s, &mut [0; 16]) + } + + // The combined length of the first and third strings are longer than the buffer, but that's OK, + // as escaped strings are deserialized into owned str types, e.g. `heapless::String`. + // The second string is longer than the buffer, but that's OK, as strings which aren't escaped + // are deserialized as str's borrowed from the input + + assert_eq!( + from_str_test( + r#" [ "AAAAAAAAAAAA\n", "BBBBBBBBBBBBBBBBBBBBBBBB", "CCCCCCCCCCCC\n" ] "# + ), + Ok(( + ( + s("AAAAAAAAAAAA\n"), + "BBBBBBBBBBBBBBBBBBBBBBBB", + s("CCCCCCCCCCCC\n") + ), + 68 + )) ); + } + + #[test] + fn escaped_str() { assert_eq!( - crate::from_str(r#" "foo bar\\\\\\\\" "#), - Ok((r#"foo bar\\\\\\\\"#, 19)) + crate::from_str(r#""Hello\nWorld""#), + Ok((crate::str::EscapedStr(r#"Hello\nWorld"#), 14)) ); - assert_eq!(crate::from_str(r#" "\\" "#), Ok((r#"\\"#, 6))); } #[test] @@ -1085,10 +1276,9 @@ mod tests { assert_eq!( crate::from_str::(r#"[10]"#), Err(crate::de::Error::CustomErrorWithMessage( - heapless::String::from_str( - "invalid length 1, expected tuple struct Xy with 2 elements" - ) - .unwrap() + "invalid length 1, expected tuple struct Xy with 2 elements" + .parse() + .unwrap() )) ); assert_eq!( @@ -1195,9 +1385,7 @@ mod tests { use serde::de::Error; assert_eq!( crate::de::Error::custom("something bad happened"), - crate::de::Error::CustomErrorWithMessage( - heapless::String::from_str("something bad happened").unwrap() - ) + crate::de::Error::CustomErrorWithMessage("something bad happened".parse().unwrap()) ); } @@ -1207,8 +1395,8 @@ mod tests { use serde::de::Error; assert_eq!( crate::de::Error::custom("0123456789012345678901234567890123456789012345678901234567890123 <- after here the message should be truncated"), - crate::de::Error::CustomErrorWithMessage(heapless::String::from_str( - "0123456789012345678901234567890123456789012345678901234567890123").unwrap() + crate::de::Error::CustomErrorWithMessage( + "0123456789012345678901234567890123456789012345678901234567890123".parse().unwrap() ) ); } diff --git a/src/de/seq.rs b/src/de/seq.rs index f3dbbdca..1470a5af 100644 --- a/src/de/seq.rs +++ b/src/de/seq.rs @@ -2,18 +2,18 @@ use serde::de; use crate::de::{Deserializer, Error, Result}; -pub(crate) struct SeqAccess<'a, 'b> { +pub(crate) struct SeqAccess<'a, 'b, 's> { first: bool, - de: &'a mut Deserializer<'b>, + de: &'a mut Deserializer<'b, 's>, } -impl<'a, 'b> SeqAccess<'a, 'b> { - pub fn new(de: &'a mut Deserializer<'b>) -> Self { +impl<'a, 'b, 's> SeqAccess<'a, 'b, 's> { + pub fn new(de: &'a mut Deserializer<'b, 's>) -> Self { SeqAccess { de, first: true } } } -impl<'a, 'de> de::SeqAccess<'de> for SeqAccess<'a, 'de> { +impl<'a, 'de, 's> de::SeqAccess<'de> for SeqAccess<'a, 'de, 's> { type Error = Error; fn next_element_seed(&mut self, seed: T) -> Result> diff --git a/src/lib.rs b/src/lib.rs index 7f2de3a3..c40dbb44 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,7 +52,7 @@ //! //! # Minimum Supported Rust Version (MSRV) //! -//! This crate is guaranteed to compile on stable Rust 1.62.0 and up. It *might* compile with older +//! This crate is guaranteed to compile on stable Rust 1.65.0 and up. It *might* compile with older //! versions but that may change in any new patch release. #![deny(missing_docs)] @@ -63,9 +63,10 @@ pub mod de; pub mod ser; +pub mod str; #[doc(inline)] -pub use self::de::{from_slice, from_str}; +pub use self::de::{from_slice, from_slice_escaped, from_str, from_str_escaped}; #[doc(inline)] pub use self::ser::to_slice; #[cfg(feature = "heapless")] diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 977509d7..74db6e09 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -363,11 +363,190 @@ impl<'a, 'b: 'a> ser::Serializer for &'a mut Serializer<'b> { self.serialize_str(variant) } - fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result + fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result where T: ser::Serialize + ?Sized, { - value.serialize(self) + // If the newtype struct is an `EscapedStr`... + if name == crate::str::EscapedStr::NAME { + // serialize it as an already escaped string. + + struct EscapedStringSerializer<'a, 'b>(&'a mut Serializer<'b>); + + impl<'a, 'b: 'a> serde::Serializer for EscapedStringSerializer<'a, 'b> { + type Ok = (); + type Error = Error; + + type SerializeSeq = serde::ser::Impossible<(), Error>; + type SerializeTuple = serde::ser::Impossible<(), Error>; + type SerializeTupleStruct = serde::ser::Impossible<(), Error>; + type SerializeTupleVariant = serde::ser::Impossible<(), Error>; + type SerializeMap = serde::ser::Impossible<(), Error>; + type SerializeStruct = serde::ser::Impossible<(), Error>; + type SerializeStructVariant = serde::ser::Impossible<(), Error>; + + fn serialize_bool(self, _v: bool) -> Result { + unreachable!() + } + + fn serialize_i8(self, _v: i8) -> Result { + unreachable!() + } + + fn serialize_i16(self, _v: i16) -> Result { + unreachable!() + } + + fn serialize_i32(self, _v: i32) -> Result { + unreachable!() + } + + fn serialize_i64(self, _v: i64) -> Result { + unreachable!() + } + + fn serialize_u8(self, _v: u8) -> Result { + unreachable!() + } + + fn serialize_u16(self, _v: u16) -> Result { + unreachable!() + } + + fn serialize_u32(self, _v: u32) -> Result { + unreachable!() + } + + fn serialize_u64(self, _v: u64) -> Result { + unreachable!() + } + + fn serialize_f32(self, _v: f32) -> Result { + unreachable!() + } + + fn serialize_f64(self, _v: f64) -> Result { + unreachable!() + } + + fn serialize_char(self, _v: char) -> Result { + unreachable!() + } + + fn serialize_str(self, v: &str) -> Result { + v.bytes().try_for_each(|c| self.0.push(c)) + } + + fn serialize_bytes(self, _v: &[u8]) -> Result { + unreachable!() + } + + fn serialize_none(self) -> Result { + unreachable!() + } + + fn serialize_some(self, _value: &T) -> Result { + unreachable!() + } + + fn serialize_unit(self) -> Result { + unreachable!() + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + unreachable!() + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result { + unreachable!() + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + _value: &T, + ) -> Result { + unreachable!() + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result { + unreachable!() + } + + fn serialize_seq(self, _len: Option) -> Result { + unreachable!() + } + + fn serialize_tuple(self, _len: usize) -> Result { + unreachable!() + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } + + fn serialize_map(self, _len: Option) -> Result { + unreachable!() + } + + fn serialize_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } + + fn collect_str(self, _value: &T) -> Result { + unreachable!() + } + } + + self.push(b'"')?; + + value.serialize(EscapedStringSerializer(self))?; + + self.push(b'"')?; + + Ok(()) + } else { + value.serialize(self) + } } fn serialize_newtype_variant( @@ -647,11 +826,19 @@ mod tests { r#"" \u001D ""# ); assert_eq!( - &*crate::to_string::<_, N>(" \u{001f} ").unwrap(), + crate::to_string::<_, N>(" \u{001f} ").unwrap(), r#"" \u001F ""# ); } + #[test] + fn escaped_str() { + assert_eq!( + crate::to_string::<_, N>(&crate::str::EscapedStr(r#"Hello\\nWorld"#)).unwrap(), + r#""Hello\\nWorld""# + ); + } + #[test] fn struct_bool() { #[derive(Serialize)] diff --git a/src/str.rs b/src/str.rs new file mode 100644 index 00000000..ab122485 --- /dev/null +++ b/src/str.rs @@ -0,0 +1,135 @@ +//! Utilities for serializing and deserializing strings. + +use core::fmt; + +#[derive(Debug)] +/// A fragment of an escaped string +pub enum EscapedStringFragment<'a> { + /// A series of characters which weren't escaped in the input. + NotEscaped(&'a str), + /// A character which was escaped in the input. + Escaped(char), +} + +#[derive(Debug)] +/// Errors occuring while unescaping strings. +pub enum StringUnescapeError { + /// Failed to unescape a character due to an invalid escape sequence. + InvalidEscapeSequence, +} + +impl fmt::Display for StringUnescapeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + StringUnescapeError::InvalidEscapeSequence => write!( + f, + "Failed to unescape a character due to an invalid escape sequence." + ), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for StringUnescapeError {} + +fn unescape_next_fragment( + escaped_string: &str, +) -> Result<(EscapedStringFragment<'_>, &str), StringUnescapeError> { + Ok(if let Some(rest) = escaped_string.strip_prefix('\\') { + let mut escaped_string_chars = rest.chars(); + + let unescaped_char = match escaped_string_chars.next() { + Some('"') => '"', + Some('\\') => '\\', + Some('/') => '/', + Some('b') => '\x08', + Some('f') => '\x0C', + Some('n') => '\n', + Some('r') => '\r', + Some('t') => '\t', + Some('u') => { + fn split_first_slice(s: &str, len: usize) -> Option<(&str, &str)> { + Some((s.get(..len)?, s.get(len..)?)) + } + + let (escape_sequence, remaining_escaped_string_chars) = + split_first_slice(escaped_string_chars.as_str(), 4) + .ok_or(StringUnescapeError::InvalidEscapeSequence)?; + + escaped_string_chars = remaining_escaped_string_chars.chars(); + + u32::from_str_radix(escape_sequence, 16) + .ok() + .and_then(char::from_u32) + .ok_or(StringUnescapeError::InvalidEscapeSequence)? + } + _ => return Err(StringUnescapeError::InvalidEscapeSequence), + }; + + ( + EscapedStringFragment::Escaped(unescaped_char), + escaped_string_chars.as_str(), + ) + } else { + let (fragment, rest) = + escaped_string.split_at(escaped_string.find('\\').unwrap_or(escaped_string.len())); + + (EscapedStringFragment::NotEscaped(fragment), rest) + }) +} + +/// A borrowed escaped string. `EscapedStr` can be used to borrow an escaped string from the input, +/// even when deserialized using `from_str_escaped` or `from_slice_escaped`. +/// +/// ``` +/// #[derive(serde::Deserialize)] +/// struct Event<'a> { +/// name: heapless::String<16>, +/// #[serde(borrow)] +/// description: serde_json_core::str::EscapedStr<'a>, +/// } +/// +/// serde_json_core::de::from_str_escaped::>( +/// r#"{ "name": "Party\u0021", "description": "I'm throwing a party! Hopefully the \u2600 shines!" }"#, +/// &mut [0; 8], +/// ) +/// .unwrap(); +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename = "__serde_json_core_escaped_string__")] +pub struct EscapedStr<'a>(pub &'a str); + +impl<'a> EscapedStr<'a> { + pub(crate) const NAME: &'static str = "__serde_json_core_escaped_string__"; + + /// Returns an iterator over the `EscapedStringFragment`s of an escaped string. + pub fn fragments(&self) -> EscapedStringFragmentIter<'a> { + EscapedStringFragmentIter(self.0) + } +} + +/// An iterator over the `EscapedStringFragment`s of an escaped string. +pub struct EscapedStringFragmentIter<'a>(&'a str); + +impl<'a> EscapedStringFragmentIter<'a> { + /// Views the underlying data as a subslice of the original data. + pub fn as_str(&self) -> EscapedStr<'a> { + EscapedStr(self.0) + } +} + +impl<'a> Iterator for EscapedStringFragmentIter<'a> { + type Item = Result, StringUnescapeError>; + + fn next(&mut self) -> Option { + if self.0.is_empty() { + return None; + } + + Some(unescape_next_fragment(self.0).map(|(fragment, rest)| { + self.0 = rest; + + fragment + })) + } +}