Skip to content

Commit

Permalink
Update documentation and fixed several issue from clippy and rustdoc
Browse files Browse the repository at this point in the history
  • Loading branch information
TheVeryDarkness committed Oct 10, 2024
1 parent 03f81c6 commit 857ba06
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 13 deletions.
5 changes: 3 additions & 2 deletions src/stream/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ fn trim_matches<'s>(s: &'s str, white: &[FixedUtf8Char]) -> &'s str {
///
/// It provides a way to read:
///
/// - A single non-ASCII-whitespace character ([BufReadExt::try_get_non_ws]),
/// - A single ASCII-white-space-separated string ([BufReadExt::try_get_string_some]),
/// - A single non-`skipped` character ([BufReadExt::try_get_non]),
/// - A single string separated by `skipped` ([BufReadExt::try_get_string_some]),
/// - A single non-empty line ([BufReadExt::try_get_line_some]),
/// - Or just the remained line ([BufReadExt::try_get_line]).
/// - ...
///
/// ASCII whitespace characters here are `' '`, `'\t'`, `'\n'`, and `'\r'`.
///
Expand Down
18 changes: 17 additions & 1 deletion src/utf8char/extensible.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ use std::mem::transmute;
/// - `11110000..=11110111`: 4 bytes
/// - `11111000..=11111011`: 5 bytes (not valid)
/// - `11111100..=11111101`: 6 bytes (not valid)
///
/// # Invariants
///
/// - The byte slice is a single valid UTF-8 character.
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct Utf8Char {
bytes: [u8],
Expand All @@ -32,14 +36,26 @@ impl AsRef<str> for Utf8Char {

impl Utf8Char {
/// Create a new `Utf8Char` from a byte array.
///
/// # Safety
///
/// This function is unsafe because it does not check if the byte array is a valid UTF-8 character.
pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
debug_assert!(std::str::from_utf8(bytes).is_ok());
debug_assert!(!bytes.is_empty());
debug_assert!(utf8_len_from_first_byte(bytes[0]) == bytes.len());
transmute(bytes)
}
/// Get the length in bytes of the UTF-8 character.
pub const fn len(&self) -> usize {
self.bytes.len()
}
/// Check if the UTF-8 character is empty.
///
/// This function always returns `false`.
pub const fn is_empty(&self) -> bool {
false
}
/// Get the bytes of the UTF-8 character.
pub const fn as_bytes(&self) -> &[u8] {
&self.bytes
Expand All @@ -54,7 +70,7 @@ impl Utf8Char {
///
/// Returns `None` if the string is empty.
pub fn from_first_char(s: &str) -> Option<&Self> {
let byte = s.as_bytes().get(0)?;
let byte = s.as_bytes().first()?;
let l = unsafe { utf8_len_from_first_byte(*byte) };
let bytes: &Self = unsafe { transmute(s.as_bytes().get(0..l)?) };
Some(bytes)
Expand Down
28 changes: 24 additions & 4 deletions src/utf8char/fixed.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::utf8char::utf8_len_from_first_byte;

/// A UTF-8 character that is fixed in size.
///
/// 10xxxxxx: continuation byte
Expand All @@ -8,6 +10,10 @@
/// - `11110000..=11110111`: 4 bytes
/// - `11111000..=11111011`: 5 bytes (not valid)
/// - `11111100..=11111101`: 6 bytes (not valid)
///
/// # Invariants
///
/// - The byte array is a single valid UTF-8 character.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct FixedUtf8Char {
bytes: [u8; 4],
Expand Down Expand Up @@ -35,13 +41,27 @@ impl From<char> for FixedUtf8Char {

impl FixedUtf8Char {
/// Create a new `FixedUtf8Char` from a byte array.
///
/// # Safety
///
/// This function is unsafe because it does not check if the byte array is a valid UTF-8 character.
pub const unsafe fn from_bytes_unchecked(bytes: [u8; 4]) -> Self {
debug_assert!(std::str::from_utf8(&bytes).is_ok());
debug_assert!(bytes[0] > 0);
debug_assert!(utf8_len_from_first_byte(bytes[0]) <= 1 || bytes[1] == 0);
debug_assert!(utf8_len_from_first_byte(bytes[0]) <= 2 || bytes[2] == 0);
debug_assert!(utf8_len_from_first_byte(bytes[0]) <= 3 || bytes[3] == 0);
Self { bytes }
}
/// Get the length in bytes of the UTF-8 character.
pub const fn len(&self) -> usize {
unsafe { super::utf8_len_from_first_byte(self.bytes[0]) }
unsafe { utf8_len_from_first_byte(self.bytes[0]) }
}
/// Check if the UTF-8 character is empty.
///
/// This function always returns `false`.
pub const fn is_empty(&self) -> bool {
false
}
/// Get the bytes of the UTF-8 character.
pub fn as_bytes(&self) -> &[u8] {
Expand All @@ -58,8 +78,8 @@ impl FixedUtf8Char {
/// Returns `None` if the string is empty.
pub fn from_first_char(s: &str) -> Option<Self> {
let mut bytes = [0; 4];
let byte = s.as_bytes().get(0)?;
let l = unsafe { super::utf8_len_from_first_byte(*byte) };
let byte = s.as_bytes().first()?;
let l = unsafe { utf8_len_from_first_byte(*byte) };
bytes[0..l].copy_from_slice(s.as_bytes().get(0..l)?);
Some(Self { bytes })
}
Expand All @@ -75,7 +95,7 @@ impl PartialEq<char> for FixedUtf8Char {

impl PartialEq<FixedUtf8Char> for char {
fn eq(&self, other: &FixedUtf8Char) -> bool {
<FixedUtf8Char as PartialEq<char>>::eq(other, &self)
<FixedUtf8Char as PartialEq<char>>::eq(other, self)
}
}

Expand Down
8 changes: 6 additions & 2 deletions src/utf8char/iter_extensible.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ impl<'a> IterUtf8Char<'a> {
Self { bytes }
}
/// Create a new `IterUtf8Char` from a byte slice.
///
/// # Safety
///
/// This function is unsafe because it does not check if the byte slice is a valid UTF-8 string.
pub const unsafe fn new_from_bytes_unchecked(bytes: &'a [u8]) -> Self {
Self { bytes }
}
Expand All @@ -23,9 +27,9 @@ impl<'a> Iterator for IterUtf8Char<'a> {
type Item = &'a Utf8Char;

fn next(&mut self) -> Option<Self::Item> {
let byte = self.bytes.get(0)?;
let byte = self.bytes.first()?;
let l = unsafe { super::utf8_len_from_first_byte(*byte) };
let c = unsafe { transmute(self.bytes.get(0..l)?) };
let c: &Utf8Char = unsafe { transmute(self.bytes.get(0..l)?) };
self.bytes = &self.bytes[l..];
Some(c)
}
Expand Down
10 changes: 6 additions & 4 deletions src/utf8char/iter_fixed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ impl<'a> IterFixedUtf8Char<'a> {
Self { bytes }
}
/// Create a new `IterFixedUtf8Char` from a byte slice.
///
/// # Safety
///
/// This function is unsafe because it does not check if the byte slice is a valid UTF-8 string.
pub const unsafe fn new_from_bytes_unchecked(bytes: &'a [u8]) -> Self {
Self { bytes }
}
Expand All @@ -23,9 +27,7 @@ impl<'a> Iterator for IterFixedUtf8Char<'a> {
type Item = FixedUtf8Char;

fn next(&mut self) -> Option<Self::Item> {
FixedUtf8Char::from_first_char(unsafe { from_utf8_unchecked(self.bytes) }).map(|c| {
self.bytes = &self.bytes[c.len()..];
c
})
FixedUtf8Char::from_first_char(unsafe { from_utf8_unchecked(self.bytes) })
.inspect(|c| self.bytes = &self.bytes[c.len()..])
}
}

0 comments on commit 857ba06

Please sign in to comment.