Skip to content

Commit ae30dc9

Browse files
committed
add MAX_LEN_UTF8 and MAX_LEN_UTF16 constants
1 parent ef32456 commit ae30dc9

File tree

14 files changed

+48
-17
lines changed

14 files changed

+48
-17
lines changed

library/alloc/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
#![feature(assert_matches)]
110110
#![feature(async_fn_traits)]
111111
#![feature(async_iterator)]
112+
#![feature(char_max_len)]
112113
#![feature(coerce_unsized)]
113114
#![feature(const_align_of_val)]
114115
#![feature(const_box)]

library/alloc/src/string.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
4343
#![stable(feature = "rust1", since = "1.0.0")]
4444

45+
use core::char::MAX_LEN_UTF8;
4546
use core::error::Error;
4647
use core::fmt;
4748
use core::hash;
@@ -1343,9 +1344,10 @@ impl String {
13431344
#[inline]
13441345
#[stable(feature = "rust1", since = "1.0.0")]
13451346
pub fn push(&mut self, ch: char) {
1347+
13461348
match ch.len_utf8() {
13471349
1 => self.vec.push(ch as u8),
1348-
_ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
1350+
_ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; MAX_LEN_UTF8]).as_bytes()),
13491351
}
13501352
}
13511353

@@ -1644,7 +1646,7 @@ impl String {
16441646
#[rustc_confusables("set")]
16451647
pub fn insert(&mut self, idx: usize, ch: char) {
16461648
assert!(self.is_char_boundary(idx));
1647-
let mut bits = [0; 4];
1649+
let mut bits = [0; MAX_LEN_UTF8];
16481650
let bits = ch.encode_utf8(&mut bits).as_bytes();
16491651

16501652
unsafe {
@@ -2633,7 +2635,7 @@ impl ToString for core::ascii::Char {
26332635
impl ToString for char {
26342636
#[inline]
26352637
fn to_string(&self) -> String {
2636-
String::from(self.encode_utf8(&mut [0; 4]))
2638+
String::from(self.encode_utf8(&mut [0; MAX_LEN_UTF8]))
26372639
}
26382640
}
26392641

library/alloc/tests/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#![feature(iter_array_chunks)]
44
#![feature(assert_matches)]
55
#![feature(btree_extract_if)]
6+
#![feature(char_max_len)]
67
#![feature(cow_is_borrowed)]
78
#![feature(const_cow_is_borrowed)]
89
#![feature(const_heap)]

library/alloc/tests/str.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
use std::assert_matches::assert_matches;
44
use std::borrow::Cow;
5+
use std::char::MAX_LEN_UTF8;
56
use std::cmp::Ordering::{Equal, Greater, Less};
67
use std::str::{from_utf8, from_utf8_unchecked};
78

@@ -1230,7 +1231,7 @@ fn test_to_uppercase_rev_iterator() {
12301231
#[test]
12311232
#[cfg_attr(miri, ignore)] // Miri is too slow
12321233
fn test_chars_decoding() {
1233-
let mut bytes = [0; 4];
1234+
let mut bytes = [0; MAX_LEN_UTF8];
12341235
for c in (0..0x110000).filter_map(std::char::from_u32) {
12351236
let s = c.encode_utf8(&mut bytes);
12361237
if Some(c) != s.chars().next() {
@@ -1242,7 +1243,7 @@ fn test_chars_decoding() {
12421243
#[test]
12431244
#[cfg_attr(miri, ignore)] // Miri is too slow
12441245
fn test_chars_rev_decoding() {
1245-
let mut bytes = [0; 4];
1246+
let mut bytes = [0; MAX_LEN_UTF8];
12461247
for c in (0..0x110000).filter_map(std::char::from_u32) {
12471248
let s = c.encode_utf8(&mut bytes);
12481249
if Some(c) != s.chars().rev().next() {

library/core/src/char/methods.rs

+10
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ impl char {
7474
#[stable(feature = "assoc_char_consts", since = "1.52.0")]
7575
pub const MAX: char = '\u{10ffff}';
7676

77+
/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
78+
/// UTF-8 encoding.
79+
#[unstable(feature = "char_max_len", issue = "none")]
80+
pub const MAX_LEN_UTF8: usize = 4;
81+
82+
/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
83+
/// to UTF-16 encoding.
84+
#[unstable(feature = "char_max_len", issue = "none")]
85+
pub const MAX_LEN_UTF16: usize = 2;
86+
7787
/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
7888
/// decoding error.
7989
///

library/core/src/char/mod.rs

+10
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,16 @@ const MAX_THREE_B: u32 = 0x10000;
9393
#[stable(feature = "rust1", since = "1.0.0")]
9494
pub const MAX: char = char::MAX;
9595

96+
/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
97+
/// UTF-8 encoding.
98+
#[unstable(feature = "char_max_len", issue = "none")]
99+
pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;
100+
101+
/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
102+
/// to UTF-16 encoding.
103+
#[unstable(feature = "char_max_len", issue = "none")]
104+
pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;
105+
96106
/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
97107
/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
98108
#[stable(feature = "decode_utf16", since = "1.9.0")]

library/core/src/fmt/mod.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#![stable(feature = "rust1", since = "1.0.0")]
44

55
use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell};
6-
use crate::char::EscapeDebugExtArgs;
6+
use crate::char::{EscapeDebugExtArgs, MAX_LEN_UTF8};
77
use crate::iter;
88
use crate::marker::PhantomData;
99
use crate::mem;
@@ -164,7 +164,7 @@ pub trait Write {
164164
/// ```
165165
#[stable(feature = "fmt_write_char", since = "1.1.0")]
166166
fn write_char(&mut self, c: char) -> Result {
167-
self.write_str(c.encode_utf8(&mut [0; 4]))
167+
self.write_str(c.encode_utf8(&mut [0; MAX_LEN_UTF8]))
168168
}
169169

170170
/// Glue for usage of the [`write!`] macro with implementors of this trait.
@@ -2387,7 +2387,7 @@ impl Display for char {
23872387
if f.width.is_none() && f.precision.is_none() {
23882388
f.write_char(*self)
23892389
} else {
2390-
f.pad(self.encode_utf8(&mut [0; 4]))
2390+
f.pad(self.encode_utf8(&mut [0; MAX_LEN_UTF8]))
23912391
}
23922392
}
23932393
}

library/core/src/str/pattern.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
issue = "27721"
3939
)]
4040

41+
use crate::char::MAX_LEN_UTF8;
4142
use crate::cmp;
4243
use crate::cmp::Ordering;
4344
use crate::convert::TryInto as _;
@@ -548,7 +549,7 @@ impl<'a> Pattern<'a> for char {
548549

549550
#[inline]
550551
fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
551-
let mut utf8_encoded = [0; 4];
552+
let mut utf8_encoded = [0; MAX_LEN_UTF8];
552553
let utf8_size = self
553554
.encode_utf8(&mut utf8_encoded)
554555
.len()

library/core/tests/char.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::char::MAX_LEN_UTF8;
12
use std::str::FromStr;
23
use std::{char, str};
34

@@ -259,7 +260,7 @@ fn test_escape_unicode() {
259260
#[test]
260261
fn test_encode_utf8() {
261262
fn check(input: char, expect: &[u8]) {
262-
let mut buf = [0; 4];
263+
let mut buf = [0; MAX_LEN_UTF8];
263264
let ptr = buf.as_ptr();
264265
let s = input.encode_utf8(&mut buf);
265266
assert_eq!(s.as_ptr() as usize, ptr as usize);

library/core/tests/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#![feature(async_iterator)]
88
#![feature(bigint_helper_methods)]
99
#![feature(cell_update)]
10+
#![feature(char_max_len)]
1011
#![feature(const_align_offset)]
1112
#![feature(const_align_of_val_raw)]
1213
#![feature(const_black_box)]

library/std/src/fs/tests.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::io::prelude::*;
22

3+
use crate::char::MAX_LEN_UTF8;
34
use crate::env;
45
use crate::fs::{self, File, FileTimes, OpenOptions};
56
use crate::io::{BorrowedBuf, ErrorKind, SeekFrom};
@@ -176,7 +177,7 @@ fn file_test_io_non_positional_read() {
176177
#[test]
177178
fn file_test_io_seek_and_tell_smoke_test() {
178179
let message = "ten-four";
179-
let mut read_mem = [0; 4];
180+
let mut read_mem = [0; MAX_LEN_UTF8];
180181
let set_cursor = 4 as u64;
181182
let tell_pos_pre_read;
182183
let tell_pos_post_read;
@@ -231,7 +232,7 @@ fn file_test_io_seek_shakedown() {
231232
let chunk_one: &str = "qwer";
232233
let chunk_two: &str = "asdf";
233234
let chunk_three: &str = "zxcv";
234-
let mut read_mem = [0; 4];
235+
let mut read_mem = [0; MAX_LEN_UTF8];
235236
let tmpdir = tmpdir();
236237
let filename = &tmpdir.join("file_rt_io_file_test_seek_shakedown.txt");
237238
{
@@ -496,7 +497,7 @@ fn file_test_directoryinfo_readdir() {
496497
check!(w.write(msg));
497498
}
498499
let files = check!(fs::read_dir(dir));
499-
let mut mem = [0; 4];
500+
let mut mem = [0; MAX_LEN_UTF8];
500501
for f in files {
501502
let f = f.unwrap().path();
502503
{

library/std/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@
279279
#![feature(cfg_sanitizer_cfi)]
280280
#![feature(cfg_target_thread_local)]
281281
#![feature(cfi_encoding)]
282+
#![feature(char_max_len)]
282283
#![feature(concat_idents)]
283284
#![feature(const_mut_refs)]
284285
#![feature(const_trait_impl)]

library/std/src/sys/pal/windows/stdio.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use crate::str;
1010
use crate::sys::c;
1111
use crate::sys::cvt;
1212
use crate::sys::handle::Handle;
13+
use core::char::MAX_LEN_UTF8;
1314
use core::str::utf8_char_width;
1415

1516
#[cfg(test)]
@@ -417,7 +418,7 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
417418

418419
impl IncompleteUtf8 {
419420
pub const fn new() -> IncompleteUtf8 {
420-
IncompleteUtf8 { bytes: [0; 4], len: 0 }
421+
IncompleteUtf8 { bytes: [0; MAX_LEN_UTF8], len: 0 }
421422
}
422423
}
423424

library/std/src/sys_common/wtf8.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#[cfg(test)]
1919
mod tests;
2020

21-
use core::char::{encode_utf16_raw, encode_utf8_raw};
21+
use core::char::{encode_utf16_raw, encode_utf8_raw, MAX_LEN_UTF16, MAX_LEN_UTF8};
2222
use core::str::next_code_point;
2323

2424
use crate::borrow::Cow;
@@ -243,7 +243,7 @@ impl Wtf8Buf {
243243
/// Copied from String::push
244244
/// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
245245
fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
246-
let mut bytes = [0; 4];
246+
let mut bytes = [0; MAX_LEN_UTF8];
247247
let bytes = encode_utf8_raw(code_point.value, &mut bytes);
248248
self.bytes.extend_from_slice(bytes)
249249
}
@@ -984,7 +984,7 @@ impl<'a> Iterator for EncodeWide<'a> {
984984
return Some(tmp);
985985
}
986986

987-
let mut buf = [0; 2];
987+
let mut buf = [0; MAX_LEN_UTF16];
988988
self.code_points.next().map(|code_point| {
989989
let n = encode_utf16_raw(code_point.value, &mut buf).len();
990990
if n == 2 {

0 commit comments

Comments
 (0)