diff --git a/library/core/src/ascii.rs b/library/core/src/ascii.rs index 5b3711b4071ab..d3c6c046e717f 100644 --- a/library/core/src/ascii.rs +++ b/library/core/src/ascii.rs @@ -9,9 +9,10 @@ #![stable(feature = "core_ascii", since = "1.26.0")] +use crate::escape::{AlwaysEscaped, EscapeIterInner}; +use crate::fmt; use crate::iter::FusedIterator; use crate::num::NonZero; -use crate::{escape, fmt}; mod ascii_char; #[unstable(feature = "ascii_char", issue = "110998")] @@ -24,7 +25,7 @@ pub use ascii_char::AsciiChar as Char; #[must_use = "iterators are lazy and do nothing unless consumed"] #[stable(feature = "rust1", since = "1.0.0")] #[derive(Clone)] -pub struct EscapeDefault(escape::EscapeIterInner<4>); +pub struct EscapeDefault(EscapeIterInner<4, AlwaysEscaped>); /// Returns an iterator that produces an escaped version of a `u8`. /// @@ -96,17 +97,12 @@ pub fn escape_default(c: u8) -> EscapeDefault { impl EscapeDefault { #[inline] pub(crate) const fn new(c: u8) -> Self { - Self(escape::EscapeIterInner::ascii(c)) + Self(EscapeIterInner::ascii(c)) } #[inline] pub(crate) fn empty() -> Self { - Self(escape::EscapeIterInner::empty()) - } - - #[inline] - pub(crate) fn as_str(&self) -> &str { - self.0.as_str() + Self(EscapeIterInner::empty()) } } @@ -168,7 +164,7 @@ impl FusedIterator for EscapeDefault {} #[stable(feature = "ascii_escape_display", since = "1.39.0")] impl fmt::Display for EscapeDefault { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.0.as_str()) + fmt::Display::fmt(&self.0, f) } } diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs index 5b9f0e2143f5d..82a3f6f916be3 100644 --- a/library/core/src/char/mod.rs +++ b/library/core/src/char/mod.rs @@ -44,7 +44,7 @@ pub use self::methods::{encode_utf8_raw, encode_utf8_raw_unchecked}; // perma-un use crate::ascii; pub(crate) use self::methods::EscapeDebugExtArgs; use crate::error::Error; -use crate::escape; +use crate::escape::{AlwaysEscaped, EscapeIterInner, MaybeEscaped}; use crate::fmt::{self, Write}; use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce}; use crate::num::NonZero; @@ -161,12 +161,12 @@ pub const fn from_digit(num: u32, radix: u32) -> Option { /// [`escape_unicode`]: char::escape_unicode #[derive(Clone, Debug)] #[stable(feature = "rust1", since = "1.0.0")] -pub struct EscapeUnicode(escape::EscapeIterInner<10>); +pub struct EscapeUnicode(EscapeIterInner<10, AlwaysEscaped>); impl EscapeUnicode { #[inline] const fn new(c: char) -> Self { - Self(escape::EscapeIterInner::unicode(c)) + Self(EscapeIterInner::unicode(c)) } } @@ -215,7 +215,7 @@ impl FusedIterator for EscapeUnicode {} #[stable(feature = "char_struct_display", since = "1.16.0")] impl fmt::Display for EscapeUnicode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.0.as_str()) + fmt::Display::fmt(&self.0, f) } } @@ -227,22 +227,22 @@ impl fmt::Display for EscapeUnicode { /// [`escape_default`]: char::escape_default #[derive(Clone, Debug)] #[stable(feature = "rust1", since = "1.0.0")] -pub struct EscapeDefault(escape::EscapeIterInner<10>); +pub struct EscapeDefault(EscapeIterInner<10, AlwaysEscaped>); impl EscapeDefault { #[inline] const fn printable(c: ascii::Char) -> Self { - Self(escape::EscapeIterInner::ascii(c.to_u8())) + Self(EscapeIterInner::ascii(c.to_u8())) } #[inline] const fn backslash(c: ascii::Char) -> Self { - Self(escape::EscapeIterInner::backslash(c)) + Self(EscapeIterInner::backslash(c)) } #[inline] const fn unicode(c: char) -> Self { - Self(escape::EscapeIterInner::unicode(c)) + Self(EscapeIterInner::unicode(c)) } } @@ -290,8 +290,9 @@ impl FusedIterator for EscapeDefault {} #[stable(feature = "char_struct_display", since = "1.16.0")] impl fmt::Display for EscapeDefault { + #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.0.as_str()) + fmt::Display::fmt(&self.0, f) } } @@ -303,37 +304,22 @@ impl fmt::Display for EscapeDefault { /// [`escape_debug`]: char::escape_debug #[stable(feature = "char_escape_debug", since = "1.20.0")] #[derive(Clone, Debug)] -pub struct EscapeDebug(EscapeDebugInner); - -#[derive(Clone, Debug)] -// Note: It’s possible to manually encode the EscapeDebugInner inside of -// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds -// a char) which would likely result in a more optimised code. For now we use -// the option easier to implement. -enum EscapeDebugInner { - Bytes(escape::EscapeIterInner<10>), - Char(char), -} +pub struct EscapeDebug(EscapeIterInner<10, MaybeEscaped>); impl EscapeDebug { #[inline] const fn printable(chr: char) -> Self { - Self(EscapeDebugInner::Char(chr)) + Self(EscapeIterInner::printable(chr)) } #[inline] const fn backslash(c: ascii::Char) -> Self { - Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::backslash(c))) + Self(EscapeIterInner::backslash(c)) } #[inline] const fn unicode(c: char) -> Self { - Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::unicode(c))) - } - - #[inline] - fn clear(&mut self) { - self.0 = EscapeDebugInner::Bytes(escape::EscapeIterInner::empty()); + Self(EscapeIterInner::unicode(c)) } } @@ -343,13 +329,7 @@ impl Iterator for EscapeDebug { #[inline] fn next(&mut self) -> Option { - match self.0 { - EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from), - EscapeDebugInner::Char(chr) => { - self.clear(); - Some(chr) - } - } + self.0.next() } #[inline] @@ -367,10 +347,7 @@ impl Iterator for EscapeDebug { #[stable(feature = "char_escape_debug", since = "1.20.0")] impl ExactSizeIterator for EscapeDebug { fn len(&self) -> usize { - match &self.0 { - EscapeDebugInner::Bytes(bytes) => bytes.len(), - EscapeDebugInner::Char(_) => 1, - } + self.0.len() } } @@ -379,11 +356,9 @@ impl FusedIterator for EscapeDebug {} #[stable(feature = "char_escape_debug", since = "1.20.0")] impl fmt::Display for EscapeDebug { + #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.0 { - EscapeDebugInner::Bytes(bytes) => f.write_str(bytes.as_str()), - EscapeDebugInner::Char(chr) => f.write_char(*chr), - } + fmt::Display::fmt(&self.0, f) } } @@ -480,6 +455,7 @@ macro_rules! casemappingiter_impls { #[stable(feature = "char_struct_display", since = "1.16.0")] impl fmt::Display for $ITER_NAME { + #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&self.0, f) } diff --git a/library/core/src/escape.rs b/library/core/src/escape.rs index 0c3329f676eeb..f459c58270818 100644 --- a/library/core/src/escape.rs +++ b/library/core/src/escape.rs @@ -1,11 +1,16 @@ //! Helper code for character escaping. use crate::ascii; +use crate::fmt::{self, Write}; +use crate::marker::PhantomData; use crate::num::NonZero; use crate::ops::Range; const HEX_DIGITS: [ascii::Char; 16] = *b"0123456789abcdef".as_ascii().unwrap(); +/// Escapes a character with `\x` representation. +/// +/// Returns a buffer with the escaped representation and its corresponding range. #[inline] const fn backslash(a: ascii::Char) -> ([ascii::Char; N], Range) { const { assert!(N >= 2) }; @@ -18,6 +23,9 @@ const fn backslash(a: ascii::Char) -> ([ascii::Char; N], Range(byte: u8) -> ([ascii::Char; N], Range) { const { assert!(N >= 4) }; @@ -35,6 +43,7 @@ const fn hex_escape(byte: u8) -> ([ascii::Char; N], Range) { (output, 0..4) } +/// Returns a buffer with the verbatim character and its corresponding range. #[inline] const fn verbatim(a: ascii::Char) -> ([ascii::Char; N], Range) { const { assert!(N >= 1) }; @@ -48,7 +57,7 @@ const fn verbatim(a: ascii::Char) -> ([ascii::Char; N], Range(byte: u8) -> ([ascii::Char; N], Range) { const { assert!(N >= 4) }; @@ -122,9 +131,9 @@ const fn escape_ascii(byte: u8) -> ([ascii::Char; N], Range) } } -/// Escapes a character `\u{NNNN}` representation. +/// Escapes a character with `\u{NNNN}` representation. /// -/// Returns a buffer and the length of the escaped representation. +/// Returns a buffer with the escaped representation and its corresponding range. const fn escape_unicode(c: char) -> ([ascii::Char; N], Range) { const { assert!(N >= 10 && N < u8::MAX as usize) }; @@ -149,77 +158,214 @@ const fn escape_unicode(c: char) -> ([ascii::Char; N], Range (output, (start as u8)..(N as u8)) } -/// An iterator over an fixed-size array. -/// -/// This is essentially equivalent to array’s IntoIter except that indexes are -/// limited to u8 to reduce size of the structure. -#[derive(Clone, Debug)] -pub(crate) struct EscapeIterInner { - // The element type ensures this is always ASCII, and thus also valid UTF-8. - data: [ascii::Char; N], - - // Invariant: `alive.start <= alive.end <= N` +#[derive(Clone, Copy)] +union MaybeEscapedCharacter { + pub escape_seq: [ascii::Char; N], + pub literal: char, +} + +/// Marker type to indicate that the character is always escaped, +/// used to optimize the iterator implementation. +#[derive(Clone, Copy)] +#[non_exhaustive] +pub(crate) struct AlwaysEscaped; + +/// Marker type to indicate that the character may be escaped, +/// used to optimize the iterator implementation. +#[derive(Clone, Copy)] +#[non_exhaustive] +pub(crate) struct MaybeEscaped; + +/// An iterator over a possibly escaped character. +#[derive(Clone)] +pub(crate) struct EscapeIterInner { + // Invariant: + // + // If `alive.end <= Self::LITERAL_ESCAPE_START`, `data` must contain + // printable ASCII characters in the `alive` range of its `escape_seq` variant. + // + // If `alive.end > Self::LITERAL_ESCAPE_START`, `data` must contain a + // `char` in its `literal` variant, and the `alive` range must have a + // length of at most `1`. + data: MaybeEscapedCharacter, alive: Range, + escaping: PhantomData, } -impl EscapeIterInner { +impl EscapeIterInner { + const LITERAL_ESCAPE_START: u8 = 128; + + /// # Safety + /// + /// `data.escape_seq` must contain an escape sequence in the range given by `alive`. + #[inline] + const unsafe fn new(data: MaybeEscapedCharacter, alive: Range) -> Self { + // Longer escape sequences are not useful given `alive.end` is at most + // `Self::LITERAL_ESCAPE_START`. + const { assert!(N < Self::LITERAL_ESCAPE_START as usize) }; + + // Check bounds, which implicitly also checks the invariant + // `alive.end <= Self::LITERAL_ESCAPE_START`. + debug_assert!(alive.end <= (N + 1) as u8); + + Self { data, alive, escaping: PhantomData } + } + pub(crate) const fn backslash(c: ascii::Char) -> Self { - let (data, range) = backslash(c); - Self { data, alive: range } + let (escape_seq, alive) = backslash(c); + // SAFETY: `escape_seq` contains an escape sequence in the range given by `alive`. + unsafe { Self::new(MaybeEscapedCharacter { escape_seq }, alive) } } pub(crate) const fn ascii(c: u8) -> Self { - let (data, range) = escape_ascii(c); - Self { data, alive: range } + let (escape_seq, alive) = escape_ascii(c); + // SAFETY: `escape_seq` contains an escape sequence in the range given by `alive`. + unsafe { Self::new(MaybeEscapedCharacter { escape_seq }, alive) } } pub(crate) const fn unicode(c: char) -> Self { - let (data, range) = escape_unicode(c); - Self { data, alive: range } + let (escape_seq, alive) = escape_unicode(c); + // SAFETY: `escape_seq` contains an escape sequence in the range given by `alive`. + unsafe { Self::new(MaybeEscapedCharacter { escape_seq }, alive) } } #[inline] pub(crate) const fn empty() -> Self { - Self { data: [ascii::Char::Null; N], alive: 0..0 } + // SAFETY: `0..0` ensures an empty escape sequence. + unsafe { Self::new(MaybeEscapedCharacter { escape_seq: [ascii::Char::Null; N] }, 0..0) } } #[inline] - pub(crate) fn as_ascii(&self) -> &[ascii::Char] { - // SAFETY: `self.alive` is guaranteed to be a valid range for indexing `self.data`. - unsafe { - self.data.get_unchecked(usize::from(self.alive.start)..usize::from(self.alive.end)) - } + pub(crate) fn len(&self) -> usize { + usize::from(self.alive.end - self.alive.start) } #[inline] - pub(crate) fn as_str(&self) -> &str { - self.as_ascii().as_str() + pub(crate) fn advance_by(&mut self, n: usize) -> Result<(), NonZero> { + self.alive.advance_by(n) } #[inline] - pub(crate) fn len(&self) -> usize { - usize::from(self.alive.end - self.alive.start) + pub(crate) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero> { + self.alive.advance_back_by(n) + } + + /// Returns a `char` if `self.data` contains one in its `literal` variant. + #[inline] + const fn to_char(&self) -> Option { + if self.alive.end > Self::LITERAL_ESCAPE_START { + // SAFETY: We just checked that `self.data` contains a `char` in + // its `literal` variant. + return Some(unsafe { self.data.literal }); + } + + None } + /// Returns the printable ASCII characters in the `escape_seq` variant of `self.data` + /// as a string. + /// + /// # Safety + /// + /// - `self.data` must contain printable ASCII characters in its `escape_seq` variant. + /// - `self.alive` must be a valid range for `self.data.escape_seq`. + #[inline] + unsafe fn to_str_unchecked(&self) -> &str { + debug_assert!(self.alive.end <= Self::LITERAL_ESCAPE_START); + + // SAFETY: The caller guarantees `self.data` contains printable ASCII + // characters in its `escape_seq` variant, and `self.alive` is + // a valid range for `self.data.escape_seq`. + unsafe { + self.data + .escape_seq + .get_unchecked(usize::from(self.alive.start)..usize::from(self.alive.end)) + .as_str() + } + } +} + +impl EscapeIterInner { pub(crate) fn next(&mut self) -> Option { let i = self.alive.next()?; - // SAFETY: `i` is guaranteed to be a valid index for `self.data`. - unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) } + // SAFETY: The `AlwaysEscaped` marker guarantees that `self.data` + // contains printable ASCII characters in its `escape_seq` + // variant, and `i` is guaranteed to be a valid index for + // `self.data.escape_seq`. + unsafe { Some(self.data.escape_seq.get_unchecked(usize::from(i)).to_u8()) } } pub(crate) fn next_back(&mut self) -> Option { let i = self.alive.next_back()?; - // SAFETY: `i` is guaranteed to be a valid index for `self.data`. - unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) } + // SAFETY: The `AlwaysEscaped` marker guarantees that `self.data` + // contains printable ASCII characters in its `escape_seq` + // variant, and `i` is guaranteed to be a valid index for + // `self.data.escape_seq`. + unsafe { Some(self.data.escape_seq.get_unchecked(usize::from(i)).to_u8()) } } +} - pub(crate) fn advance_by(&mut self, n: usize) -> Result<(), NonZero> { - self.alive.advance_by(n) +impl EscapeIterInner { + // This is the only way to create any `EscapeIterInner` containing a `char` in + // the `literal` variant of its `self.data`, meaning the `AlwaysEscaped` marker + // guarantees that `self.data` contains printable ASCII characters in its + // `escape_seq` variant. + pub(crate) const fn printable(c: char) -> Self { + Self { + data: MaybeEscapedCharacter { literal: c }, + // Uphold the invariant `alive.end > Self::LITERAL_ESCAPE_START`, and ensure + // `len` behaves correctly for iterating through one character literal. + alive: Self::LITERAL_ESCAPE_START..(Self::LITERAL_ESCAPE_START + 1), + escaping: PhantomData, + } } - pub(crate) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero> { - self.alive.advance_back_by(n) + pub(crate) fn next(&mut self) -> Option { + let i = self.alive.next()?; + + if let Some(c) = self.to_char() { + return Some(c); + } + + // SAFETY: At this point, `self.data` must contain printable ASCII + // characters in its `escape_seq` variant, and `i` is + // guaranteed to be a valid index for `self.data.escape_seq`. + Some(char::from(unsafe { self.data.escape_seq.get_unchecked(usize::from(i)).to_u8() })) + } +} + +impl fmt::Display for EscapeIterInner { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // SAFETY: The `AlwaysEscaped` marker guarantees that `self.data` + // contains printable ASCII chars, and `self.alive` is + // guaranteed to be a valid range for `self.data`. + f.write_str(unsafe { self.to_str_unchecked() }) + } +} + +impl fmt::Display for EscapeIterInner { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(c) = self.to_char() { + return f.write_char(c); + } + + // SAFETY: At this point, `self.data` must contain printable ASCII + // characters in its `escape_seq` variant, and `self.alive` + // is guaranteed to be a valid range for `self.data`. + f.write_str(unsafe { self.to_str_unchecked() }) + } +} + +impl fmt::Debug for EscapeIterInner { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("EscapeIterInner").field(&format_args!("'{}'", self)).finish() + } +} + +impl fmt::Debug for EscapeIterInner { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("EscapeIterInner").field(&format_args!("'{}'", self)).finish() } } diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index b4d9a1b1ca4fd..181ae82959c66 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -308,7 +308,7 @@ impl<'a> fmt::Display for EscapeAscii<'a> { if let Some(&b) = bytes.first() { // guaranteed to be non-empty, better to write it as a str - f.write_str(ascii::escape_default(b).as_str())?; + fmt::Display::fmt(&ascii::escape_default(b), f)?; bytes = &bytes[1..]; } }