-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathbytes_string.rs
292 lines (256 loc) · 8.8 KB
/
bytes_string.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0
use crate::Bytes;
#[cfg(feature = "serde")]
use serde::ser::{Serialize, Serializer};
use std::fmt::{Debug, Formatter};
use std::{borrow::Borrow, hash, str::Utf8Error};
#[derive(Clone, Eq, PartialEq)]
pub struct BytesString {
bytes: Bytes,
}
#[cfg(feature = "serde")]
impl Serialize for BytesString {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.as_str())
}
}
impl BytesString {
/// Creates a `BytesString` from a slice of bytes.
///
/// This function validates that the provided slice is valid UTF-8. If the slice is not valid
/// UTF-8, an error is returned.
///
/// # Arguments
///
/// * `slice` - A byte slice that will be converted into a `BytesString`.
///
/// # Returns
///
/// A `Result` containing the `BytesString` if the slice is valid UTF-8, or a `Utf8Error` if
/// the slice is not valid UTF-8.
///
/// # Errors
///
/// Returns a `Utf8Error` if the bytes are not valid UTF-8.
pub fn from_slice(slice: &[u8]) -> Result<Self, Utf8Error> {
std::str::from_utf8(slice)?;
Ok(Self {
bytes: Bytes::copy_from_slice(slice),
})
}
pub fn from_static(value: &'static str) -> Self {
// SAFETY: This is safe as a str is always a valid UTF-8 slice.
unsafe { Self::from_bytes_unchecked(Bytes::from_static(value.as_bytes())) }
}
pub fn from_string(value: String) -> Self {
// SAFETY: This is safe as a String is always a valid UTF-8 slice.
unsafe { Self::from_bytes_unchecked(Bytes::from_underlying(value)) }
}
pub fn from_cow(cow: std::borrow::Cow<'static, str>) -> Self {
match cow {
std::borrow::Cow::Borrowed(s) => Self::from_static(s),
std::borrow::Cow::Owned(s) => Self::from_string(s),
}
}
/// Creates a `BytesString` from a `tinybytes::Bytes` instance.
///
/// This function validates that the provided `Bytes` instance contains valid UTF-8 data. If the
/// data is not valid UTF-8, an error is returned.
///
/// # Arguments
///
/// * `bytes` - A `tinybytes::Bytes` instance that will be converted into a `BytesString`.
///
/// # Returns
///
/// A `Result` containing the `BytesString` if the bytes are valid UTF-8, or a `Utf8Error` if
/// the bytes are not valid UTF-8.
///
/// # Errors
///
/// Returns a `Utf8Error` if the bytes are not valid UTF-8.
pub fn from_bytes(bytes: Bytes) -> Result<Self, Utf8Error> {
std::str::from_utf8(&bytes)?;
Ok(Self { bytes })
}
/// Creates a `BytesString` from a string slice within the given buffer.
///
/// # Arguments
///
/// * `bytes` - A `tinybytes::Bytes` instance that will be converted into a `BytesString`.
/// * `slice` - The string slice pointing into the given bytes that will form the `BytesString`.
pub fn from_bytes_slice(bytes: &Bytes, slice: &str) -> Self {
// SAFETY: This is safe as a str slice is definitely a valid UTF-8 slice.
#[allow(clippy::expect_used)]
unsafe {
Self::from_bytes_unchecked(bytes.slice_ref(slice.as_bytes()).expect("Invalid slice"))
}
}
/// Creates a `BytesString` from a `tinybytes::Bytes` instance without validating the bytes.
///
/// This function does not perform any validation on the provided bytes, and assumes that the
/// bytes are valid UTF-8. If the bytes are not valid UTF-8, the behavior is undefined.
///
/// # Arguments
///
/// * `bytes` - A `tinybytes::Bytes` instance that will be converted into a `BytesString`.
///
/// # Safety
///
/// This function is unsafe because it assumes the bytes are valid UTF-8. If the bytes are not
/// valid UTF-8, the behavior is undefined.
pub unsafe fn from_bytes_unchecked(bytes: Bytes) -> Self {
Self { bytes }
}
/// Returns the string slice representation of the `BytesString` (without validating the bytes).
/// Typically, you should use `from_slice` or `from_bytes` when creating a BytesString to
/// ensure the bytes are valid UTF-8 (if the bytes haven't already been validated by other
/// means) so further validation may be unnecessary.
pub fn as_str(&self) -> &str {
// SAFETY: We assume all BytesStrings are valid UTF-8.
unsafe { std::str::from_utf8_unchecked(&self.bytes) }
}
/// Returns a `String` with a copy of the `BytesString`.
/// This is typically useful when you need to hold the content of a slice for a long time and
/// don't want to prevent the buffer from being dropped earlier.
pub fn copy_to_string(&self) -> String {
self.as_str().to_string()
}
/// Returns `true` if the underlying bytes are empty.
pub fn is_empty(&self) -> bool {
self.bytes.is_empty()
}
}
impl Default for BytesString {
fn default() -> Self {
Self {
bytes: Bytes::empty(),
}
}
}
impl Borrow<str> for BytesString {
fn borrow(&self) -> &str {
self.as_str()
}
}
impl AsRef<str> for BytesString {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl From<String> for BytesString {
fn from(value: String) -> Self {
Self::from_string(value)
}
}
impl From<&'static str> for BytesString {
fn from(value: &'static str) -> Self {
Self::from_static(value)
}
}
// We can't derive Hash from Bytes as [u8] and str do not provide the same hash
impl hash::Hash for BytesString {
#[inline]
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.as_str().hash(state);
}
}
impl PartialEq<&str> for BytesString {
fn eq(&self, other: &&str) -> bool {
self.as_str() == *other
}
}
impl Debug for BytesString {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.serialize_str(self.as_str())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::hash::{DefaultHasher, Hash, Hasher};
#[test]
fn test_from_slice() {
let slice = b"hello";
let bytes_string = BytesString::from_slice(slice).unwrap();
assert_eq!(bytes_string.as_str(), "hello");
}
#[test]
fn test_from_slice_invalid_utf8() {
let invalid_utf8_slice = &[0, 159, 146, 150];
let result = BytesString::from_slice(invalid_utf8_slice);
assert!(result.is_err());
}
#[test]
fn test_from_bytes() {
let bytes = Bytes::copy_from_slice(b"world");
let bytes_string = BytesString::from_bytes(bytes).unwrap();
assert_eq!(bytes_string.as_str(), "world");
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_from_bytes_invalid_utf8() {
let invalid_utf8_bytes = Bytes::copy_from_slice(&[0, 159, 146, 150]);
let result = BytesString::from_bytes(invalid_utf8_bytes);
assert!(result.is_err());
}
#[test]
fn test_from_bytes_unchecked() {
let bytes = Bytes::copy_from_slice(b"unchecked");
let bytes_string = unsafe { BytesString::from_bytes_unchecked(bytes) };
assert_eq!(bytes_string.as_str(), "unchecked");
}
#[test]
fn test_as_str() {
let bytes_string = BytesString::from_slice(b"test").unwrap();
assert_eq!(bytes_string.as_str(), "test");
}
#[test]
fn test_serialize() {
let bytes_string = BytesString::from_slice(b"serialize");
let serialized = serde_json::to_string(&bytes_string.unwrap()).unwrap();
assert_eq!(serialized, "\"serialize\"");
}
#[test]
fn test_default() {
let bytes_string: BytesString = Default::default();
assert_eq!(bytes_string.as_str(), "");
}
#[test]
fn test_borrow() {
let bytes_string = BytesString::from_slice(b"borrow").unwrap();
let borrowed: &str = bytes_string.borrow();
assert_eq!(borrowed, "borrow");
}
#[test]
fn test_from_string() {
let string = String::from("hello");
let bytes_string = BytesString::from(string);
assert_eq!(bytes_string.as_str(), "hello")
}
#[test]
fn test_from_static_str() {
let static_str = "hello";
let bytes_string = BytesString::from(static_str);
assert_eq!(bytes_string.as_str(), "hello")
}
fn calculate_hash<T: Hash>(t: &T) -> u64 {
let mut s = DefaultHasher::new();
t.hash(&mut s);
s.finish()
}
#[test]
fn test_hash() {
let bytes_string = BytesString::from_slice(b"test hash").unwrap();
assert_eq!(calculate_hash(&bytes_string), calculate_hash(&"test hash"));
}
#[test]
fn test_copy_to_string() {
let bytes_string = BytesString::from("hello");
assert_eq!(bytes_string.copy_to_string(), "hello")
}
}