Skip to content

Commit 0d9afcd

Browse files
committed
Merge core::unicode::str into core::str
And the UnicodeStr trait into StrExt
1 parent 33358dc commit 0d9afcd

File tree

5 files changed

+182
-198
lines changed

5 files changed

+182
-198
lines changed

src/liballoc/str.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
4545
use core::mem;
4646
use core::ptr;
4747
use core::iter::FusedIterator;
48-
use core::unicode::str::{UnicodeStr, Utf16Encoder};
48+
use core::unicode::Utf16Encoder;
4949

5050
use vec_deque::VecDeque;
5151
use borrow::{Borrow, ToOwned};
@@ -74,7 +74,7 @@ pub use core::str::{from_utf8, from_utf8_mut, Chars, CharIndices, Bytes};
7474
#[stable(feature = "rust1", since = "1.0.0")]
7575
pub use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut, ParseBoolError};
7676
#[stable(feature = "rust1", since = "1.0.0")]
77-
pub use core::unicode::str::SplitWhitespace;
77+
pub use core::str::SplitWhitespace;
7878
#[stable(feature = "rust1", since = "1.0.0")]
7979
pub use core::str::pattern;
8080

@@ -800,7 +800,7 @@ impl str {
800800
#[stable(feature = "split_whitespace", since = "1.1.0")]
801801
#[inline]
802802
pub fn split_whitespace(&self) -> SplitWhitespace {
803-
UnicodeStr::split_whitespace(self)
803+
StrExt::split_whitespace(self)
804804
}
805805

806806
/// An iterator over the lines of a string, as string slices.
@@ -1570,7 +1570,7 @@ impl str {
15701570
/// ```
15711571
#[stable(feature = "rust1", since = "1.0.0")]
15721572
pub fn trim(&self) -> &str {
1573-
UnicodeStr::trim(self)
1573+
StrExt::trim(self)
15741574
}
15751575

15761576
/// Returns a string slice with leading whitespace removed.
@@ -1606,7 +1606,7 @@ impl str {
16061606
/// ```
16071607
#[stable(feature = "rust1", since = "1.0.0")]
16081608
pub fn trim_left(&self) -> &str {
1609-
UnicodeStr::trim_left(self)
1609+
StrExt::trim_left(self)
16101610
}
16111611

16121612
/// Returns a string slice with trailing whitespace removed.
@@ -1642,7 +1642,7 @@ impl str {
16421642
/// ```
16431643
#[stable(feature = "rust1", since = "1.0.0")]
16441644
pub fn trim_right(&self) -> &str {
1645-
UnicodeStr::trim_right(self)
1645+
StrExt::trim_right(self)
16461646
}
16471647

16481648
/// Returns a string slice with all prefixes and suffixes that match a
@@ -2141,7 +2141,7 @@ impl str {
21412141
#[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")]
21422142
#[inline]
21432143
pub fn is_whitespace(&self) -> bool {
2144-
UnicodeStr::is_whitespace(self)
2144+
StrExt::is_whitespace(self)
21452145
}
21462146

21472147
/// Returns true if this `str` is entirely alphanumeric, and false otherwise.
@@ -2160,7 +2160,7 @@ impl str {
21602160
#[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")]
21612161
#[inline]
21622162
pub fn is_alphanumeric(&self) -> bool {
2163-
UnicodeStr::is_alphanumeric(self)
2163+
StrExt::is_alphanumeric(self)
21642164
}
21652165

21662166
/// Checks if all characters in this string are within the ASCII range.

src/liballoc/tests/str.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1204,7 +1204,7 @@ fn test_rev_split_char_iterator_no_trailing() {
12041204

12051205
#[test]
12061206
fn test_utf16_code_units() {
1207-
use core::unicode::str::Utf16Encoder;
1207+
use core::unicode::Utf16Encoder;
12081208
assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(),
12091209
[0xE9, 0xD83D, 0xDCA9])
12101210
}

src/libcore/str/mod.rs

+115-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
1919

2020
use char;
2121
use fmt;
22-
use iter::{Map, Cloned, FusedIterator, TrustedLen};
22+
use iter::{Map, Cloned, FusedIterator, TrustedLen, Filter};
2323
use iter_private::TrustedRandomAccess;
2424
use slice::{self, SliceIndex};
2525
use mem;
@@ -2216,6 +2216,18 @@ pub trait StrExt {
22162216
fn is_empty(&self) -> bool;
22172217
#[stable(feature = "core", since = "1.6.0")]
22182218
fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
2219+
#[stable(feature = "split_whitespace", since = "1.1.0")]
2220+
fn split_whitespace<'a>(&'a self) -> SplitWhitespace<'a>;
2221+
#[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")]
2222+
fn is_whitespace(&self) -> bool;
2223+
#[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")]
2224+
fn is_alphanumeric(&self) -> bool;
2225+
#[stable(feature = "rust1", since = "1.0.0")]
2226+
fn trim(&self) -> &str;
2227+
#[stable(feature = "rust1", since = "1.0.0")]
2228+
fn trim_left(&self) -> &str;
2229+
#[stable(feature = "rust1", since = "1.0.0")]
2230+
fn trim_right(&self) -> &str;
22192231
}
22202232

22212233
// truncate `&str` to length at most equal to `max`
@@ -2536,6 +2548,36 @@ impl StrExt for str {
25362548

25372549
#[inline]
25382550
fn parse<T: FromStr>(&self) -> Result<T, T::Err> { FromStr::from_str(self) }
2551+
2552+
#[inline]
2553+
fn split_whitespace(&self) -> SplitWhitespace {
2554+
SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
2555+
}
2556+
2557+
#[inline]
2558+
fn is_whitespace(&self) -> bool {
2559+
self.chars().all(|c| c.is_whitespace())
2560+
}
2561+
2562+
#[inline]
2563+
fn is_alphanumeric(&self) -> bool {
2564+
self.chars().all(|c| c.is_alphanumeric())
2565+
}
2566+
2567+
#[inline]
2568+
fn trim(&self) -> &str {
2569+
self.trim_matches(|c: char| c.is_whitespace())
2570+
}
2571+
2572+
#[inline]
2573+
fn trim_left(&self) -> &str {
2574+
self.trim_left_matches(|c: char| c.is_whitespace())
2575+
}
2576+
2577+
#[inline]
2578+
fn trim_right(&self) -> &str {
2579+
self.trim_right_matches(|c: char| c.is_whitespace())
2580+
}
25392581
}
25402582

25412583
#[stable(feature = "rust1", since = "1.0.0")]
@@ -2551,3 +2593,75 @@ impl<'a> Default for &'a str {
25512593
/// Creates an empty str
25522594
fn default() -> &'a str { "" }
25532595
}
2596+
2597+
/// An iterator over the non-whitespace substrings of a string,
2598+
/// separated by any amount of whitespace.
2599+
///
2600+
/// This struct is created by the [`split_whitespace`] method on [`str`].
2601+
/// See its documentation for more.
2602+
///
2603+
/// [`split_whitespace`]: ../../std/primitive.str.html#method.split_whitespace
2604+
/// [`str`]: ../../std/primitive.str.html
2605+
#[stable(feature = "split_whitespace", since = "1.1.0")]
2606+
#[derive(Clone, Debug)]
2607+
pub struct SplitWhitespace<'a> {
2608+
inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
2609+
}
2610+
2611+
#[derive(Clone)]
2612+
struct IsWhitespace;
2613+
2614+
impl FnOnce<(char, )> for IsWhitespace {
2615+
type Output = bool;
2616+
2617+
#[inline]
2618+
extern "rust-call" fn call_once(mut self, arg: (char, )) -> bool {
2619+
self.call_mut(arg)
2620+
}
2621+
}
2622+
2623+
impl FnMut<(char, )> for IsWhitespace {
2624+
#[inline]
2625+
extern "rust-call" fn call_mut(&mut self, arg: (char, )) -> bool {
2626+
arg.0.is_whitespace()
2627+
}
2628+
}
2629+
2630+
#[derive(Clone)]
2631+
struct IsNotEmpty;
2632+
2633+
impl<'a, 'b> FnOnce<(&'a &'b str, )> for IsNotEmpty {
2634+
type Output = bool;
2635+
2636+
#[inline]
2637+
extern "rust-call" fn call_once(mut self, arg: (&&str, )) -> bool {
2638+
self.call_mut(arg)
2639+
}
2640+
}
2641+
2642+
impl<'a, 'b> FnMut<(&'a &'b str, )> for IsNotEmpty {
2643+
#[inline]
2644+
extern "rust-call" fn call_mut(&mut self, arg: (&&str, )) -> bool {
2645+
!arg.0.is_empty()
2646+
}
2647+
}
2648+
2649+
2650+
#[stable(feature = "split_whitespace", since = "1.1.0")]
2651+
impl<'a> Iterator for SplitWhitespace<'a> {
2652+
type Item = &'a str;
2653+
2654+
fn next(&mut self) -> Option<&'a str> {
2655+
self.inner.next()
2656+
}
2657+
}
2658+
2659+
#[stable(feature = "split_whitespace", since = "1.1.0")]
2660+
impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
2661+
fn next_back(&mut self) -> Option<&'a str> {
2662+
self.inner.next_back()
2663+
}
2664+
}
2665+
2666+
#[stable(feature = "fused", since = "1.26.0")]
2667+
impl<'a> FusedIterator for SplitWhitespace<'a> {}

src/libcore/unicode/mod.rs

+58-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ mod bool_trie;
1515
pub(crate) mod tables;
1616
pub(crate) mod version;
1717

18-
pub mod str;
19-
2018
// For use in liballoc, not re-exported in libstd.
2119
pub mod derived_property {
2220
pub use unicode::tables::derived_property::{Case_Ignorable, Cased};
@@ -26,3 +24,61 @@ pub mod derived_property {
2624
pub mod property {
2725
pub use unicode::tables::property::Pattern_White_Space;
2826
}
27+
28+
use iter::FusedIterator;
29+
30+
/// Iterator adaptor for encoding `char`s to UTF-16.
31+
#[derive(Clone)]
32+
#[allow(missing_debug_implementations)]
33+
pub struct Utf16Encoder<I> {
34+
chars: I,
35+
extra: u16,
36+
}
37+
38+
impl<I> Utf16Encoder<I> {
39+
/// Create a UTF-16 encoder from any `char` iterator.
40+
pub fn new(chars: I) -> Utf16Encoder<I>
41+
where I: Iterator<Item = char>
42+
{
43+
Utf16Encoder {
44+
chars,
45+
extra: 0,
46+
}
47+
}
48+
}
49+
50+
impl<I> Iterator for Utf16Encoder<I>
51+
where I: Iterator<Item = char>
52+
{
53+
type Item = u16;
54+
55+
#[inline]
56+
fn next(&mut self) -> Option<u16> {
57+
if self.extra != 0 {
58+
let tmp = self.extra;
59+
self.extra = 0;
60+
return Some(tmp);
61+
}
62+
63+
let mut buf = [0; 2];
64+
self.chars.next().map(|ch| {
65+
let n = ch.encode_utf16(&mut buf).len();
66+
if n == 2 {
67+
self.extra = buf[1];
68+
}
69+
buf[0]
70+
})
71+
}
72+
73+
#[inline]
74+
fn size_hint(&self) -> (usize, Option<usize>) {
75+
let (low, high) = self.chars.size_hint();
76+
// every char gets either one u16 or two u16,
77+
// so this iterator is between 1 or 2 times as
78+
// long as the underlying iterator.
79+
(low, high.and_then(|n| n.checked_mul(2)))
80+
}
81+
}
82+
83+
impl<I> FusedIterator for Utf16Encoder<I>
84+
where I: FusedIterator<Item = char> {}

0 commit comments

Comments
 (0)