Update documentation and fixed several issue from clippy and rustdoc

TheVeryDarkness · Oct 10, 2024 · 857ba06 · 857ba06
1 parent 03f81c6
commit 857ba06
Show file tree

Hide file tree

Showing 5 changed files with 56 additions and 13 deletions.
diff --git a/src/stream/traits.rs b/src/stream/traits.rs
@@ -30,10 +30,11 @@ fn trim_matches<'s>(s: &'s str, white: &[FixedUtf8Char]) -> &'s str {
 ///
 /// It provides a way to read:
 ///
-/// - A single non-ASCII-whitespace character ([BufReadExt::try_get_non_ws]),
-/// - A single ASCII-white-space-separated string ([BufReadExt::try_get_string_some]),
+/// - A single non-`skipped` character ([BufReadExt::try_get_non]),
+/// - A single string separated by `skipped` ([BufReadExt::try_get_string_some]),
 /// - A single non-empty line ([BufReadExt::try_get_line_some]),
 /// - Or just the remained line ([BufReadExt::try_get_line]).
+/// - ...
 ///
 /// ASCII whitespace characters here are `' '`, `'\t'`, `'\n'`, and `'\r'`.
 ///

diff --git a/src/utf8char/extensible.rs b/src/utf8char/extensible.rs
@@ -11,6 +11,10 @@ use std::mem::transmute;
 /// - `11110000..=11110111`: 4 bytes
 /// - `11111000..=11111011`: 5 bytes (not valid)
 /// - `11111100..=11111101`: 6 bytes (not valid)
+///
+/// # Invariants
+///
+/// - The byte slice is a single valid UTF-8 character.
 #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
 pub struct Utf8Char {
     bytes: [u8],
@@ -32,14 +36,26 @@ impl AsRef<str> for Utf8Char {
 
 impl Utf8Char {
     /// Create a new `Utf8Char` from a byte array.
+    ///
+    /// # Safety
+    ///
+    /// This function is unsafe because it does not check if the byte array is a valid UTF-8 character.
     pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
         debug_assert!(std::str::from_utf8(bytes).is_ok());
+        debug_assert!(!bytes.is_empty());
+        debug_assert!(utf8_len_from_first_byte(bytes[0]) == bytes.len());
         transmute(bytes)
     }
     /// Get the length in bytes of the UTF-8 character.
     pub const fn len(&self) -> usize {
         self.bytes.len()
     }
+    /// Check if the UTF-8 character is empty.
+    ///
+    /// This function always returns `false`.
+    pub const fn is_empty(&self) -> bool {
+        false
+    }
     /// Get the bytes of the UTF-8 character.
     pub const fn as_bytes(&self) -> &[u8] {
         &self.bytes
@@ -54,7 +70,7 @@ impl Utf8Char {
     ///
     /// Returns `None` if the string is empty.
     pub fn from_first_char(s: &str) -> Option<&Self> {
-        let byte = s.as_bytes().get(0)?;
+        let byte = s.as_bytes().first()?;
         let l = unsafe { utf8_len_from_first_byte(*byte) };
         let bytes: &Self = unsafe { transmute(s.as_bytes().get(0..l)?) };
         Some(bytes)

diff --git a/src/utf8char/fixed.rs b/src/utf8char/fixed.rs
@@ -1,3 +1,5 @@
+use crate::utf8char::utf8_len_from_first_byte;
+
 /// A UTF-8 character that is fixed in size.
 ///
 /// 10xxxxxx: continuation byte
@@ -8,6 +10,10 @@
 /// - `11110000..=11110111`: 4 bytes
 /// - `11111000..=11111011`: 5 bytes (not valid)
 /// - `11111100..=11111101`: 6 bytes (not valid)
+///
+/// # Invariants
+///
+/// - The byte array is a single valid UTF-8 character.
 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
 pub struct FixedUtf8Char {
     bytes: [u8; 4],
@@ -35,13 +41,27 @@ impl From<char> for FixedUtf8Char {
 
 impl FixedUtf8Char {
     /// Create a new `FixedUtf8Char` from a byte array.
+    ///
+    /// # Safety
+    ///
+    /// This function is unsafe because it does not check if the byte array is a valid UTF-8 character.
     pub const unsafe fn from_bytes_unchecked(bytes: [u8; 4]) -> Self {
         debug_assert!(std::str::from_utf8(&bytes).is_ok());
+        debug_assert!(bytes[0] > 0);
+        debug_assert!(utf8_len_from_first_byte(bytes[0]) <= 1 || bytes[1] == 0);
+        debug_assert!(utf8_len_from_first_byte(bytes[0]) <= 2 || bytes[2] == 0);
+        debug_assert!(utf8_len_from_first_byte(bytes[0]) <= 3 || bytes[3] == 0);
         Self { bytes }
     }
     /// Get the length in bytes of the UTF-8 character.
     pub const fn len(&self) -> usize {
-        unsafe { super::utf8_len_from_first_byte(self.bytes[0]) }
+        unsafe { utf8_len_from_first_byte(self.bytes[0]) }
+    }
+    /// Check if the UTF-8 character is empty.
+    ///
+    /// This function always returns `false`.
+    pub const fn is_empty(&self) -> bool {
+        false
     }
     /// Get the bytes of the UTF-8 character.
     pub fn as_bytes(&self) -> &[u8] {
@@ -58,8 +78,8 @@ impl FixedUtf8Char {
     /// Returns `None` if the string is empty.
     pub fn from_first_char(s: &str) -> Option<Self> {
         let mut bytes = [0; 4];
-        let byte = s.as_bytes().get(0)?;
-        let l = unsafe { super::utf8_len_from_first_byte(*byte) };
+        let byte = s.as_bytes().first()?;
+        let l = unsafe { utf8_len_from_first_byte(*byte) };
         bytes[0..l].copy_from_slice(s.as_bytes().get(0..l)?);
         Some(Self { bytes })
     }
@@ -75,7 +95,7 @@ impl PartialEq<char> for FixedUtf8Char {
 
 impl PartialEq<FixedUtf8Char> for char {
     fn eq(&self, other: &FixedUtf8Char) -> bool {
-        <FixedUtf8Char as PartialEq<char>>::eq(other, &self)
+        <FixedUtf8Char as PartialEq<char>>::eq(other, self)
     }
 }
 

diff --git a/src/utf8char/iter_extensible.rs b/src/utf8char/iter_extensible.rs
@@ -14,6 +14,10 @@ impl<'a> IterUtf8Char<'a> {
         Self { bytes }
     }
     /// Create a new `IterUtf8Char` from a byte slice.
+    ///
+    /// # Safety
+    ///
+    /// This function is unsafe because it does not check if the byte slice is a valid UTF-8 string.
     pub const unsafe fn new_from_bytes_unchecked(bytes: &'a [u8]) -> Self {
         Self { bytes }
     }
@@ -23,9 +27,9 @@ impl<'a> Iterator for IterUtf8Char<'a> {
     type Item = &'a Utf8Char;
 
     fn next(&mut self) -> Option<Self::Item> {
-        let byte = self.bytes.get(0)?;
+        let byte = self.bytes.first()?;
         let l = unsafe { super::utf8_len_from_first_byte(*byte) };
-        let c = unsafe { transmute(self.bytes.get(0..l)?) };
+        let c: &Utf8Char = unsafe { transmute(self.bytes.get(0..l)?) };
         self.bytes = &self.bytes[l..];
         Some(c)
     }

diff --git a/src/utf8char/iter_fixed.rs b/src/utf8char/iter_fixed.rs
@@ -14,6 +14,10 @@ impl<'a> IterFixedUtf8Char<'a> {
         Self { bytes }
     }
     /// Create a new `IterFixedUtf8Char` from a byte slice.
+    ///
+    /// # Safety
+    ///
+    /// This function is unsafe because it does not check if the byte slice is a valid UTF-8 string.
     pub const unsafe fn new_from_bytes_unchecked(bytes: &'a [u8]) -> Self {
         Self { bytes }
     }
@@ -23,9 +27,7 @@ impl<'a> Iterator for IterFixedUtf8Char<'a> {
     type Item = FixedUtf8Char;
 
     fn next(&mut self) -> Option<Self::Item> {
-        FixedUtf8Char::from_first_char(unsafe { from_utf8_unchecked(self.bytes) }).map(|c| {
-            self.bytes = &self.bytes[c.len()..];
-            c
-        })
+        FixedUtf8Char::from_first_char(unsafe { from_utf8_unchecked(self.bytes) })
+            .inspect(|c| self.bytes = &self.bytes[c.len()..])
     }
 }