From 77f8db367530d29681fa921eb563a7be760531e7 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet <44676486+Philippe-Cholet@users.noreply.github.com> Date: Fri, 8 Mar 2024 16:22:40 +0100 Subject: [PATCH 1/4] New `Itertools::tail` The `.tail(1)` test is for code coverage. --- src/lib.rs | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ tests/quick.rs | 5 +++++ 2 files changed, 56 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index ec374c469..528a1cfa8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3144,6 +3144,57 @@ pub trait Itertools: Iterator { self.k_largest_by(k, k_smallest::key_to_cmp(key)) } + /// Consumes the iterator and return an iterator of the last `n` elements. + /// + /// The iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// ``` + /// use itertools::{assert_equal, Itertools}; + /// + /// let v = vec![5, 9, 8, 4, 2, 12, 0]; + /// assert_equal(v.iter().tail(3), &[2, 12, 0]); + /// assert_equal(v.iter().tail(10), &v); + /// + /// assert_equal(v.iter().tail(1), v.iter().last()); + /// + /// assert_equal((0..100).tail(10), 90..100); + /// ``` + /// + /// For double ended iterators without side-effects, you might prefer + /// `.rev().take(n).rev()` to have a similar result (lazy and non-allocating) + /// without consuming the entire iterator. + #[cfg(feature = "use_alloc")] + fn tail(self, n: usize) -> VecIntoIter + where + Self: Sized, + { + match n { + 0 => { + self.last(); + Vec::new() + } + 1 => self.last().into_iter().collect(), + _ => { + let mut iter = self.fuse(); + let mut data: Vec<_> = iter.by_ref().take(n).collect(); + // Update `data` cyclically. + let idx = iter.fold(0, |i, val| { + data[i] = val; + if i + 1 == n { + 0 + } else { + i + 1 + } + }); + // Respect the insertion order. + data.rotate_left(idx); + data + } + } + .into_iter() + } + /// Collect all iterator elements into one of two /// partitions. Unlike [`Iterator::partition`], each partition may /// have a distinct type. diff --git a/tests/quick.rs b/tests/quick.rs index bb08e21c6..8590303c7 100644 --- a/tests/quick.rs +++ b/tests/quick.rs @@ -1949,4 +1949,9 @@ quickcheck! { result_set.is_empty() } } + + fn tail(v: Vec, n: u8) -> bool { + let n = n as usize; + itertools::equal(v.iter().tail(n), &v[v.len().saturating_sub(n)..]) + } } From 05783544652f299422040874866b7cdbbc48c008 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet <44676486+Philippe-Cholet@users.noreply.github.com> Date: Fri, 15 Mar 2024 22:27:39 +0100 Subject: [PATCH 2/4] `Itertools::tail`: skip the starting part of the iterator If the iterator is exact sized, then `.collect()` finishes the work. More generally, if the size hint knows enough and `nth` is efficient, this might skip most of the iterator efficiently. In the tests, `.filter(..)` is there to ensure that `tail` can't leverage a precise `size_hint` to entirely skip the iteration after initial `.collect()`. Co-Authored-By: scottmcm --- src/lib.rs | 6 +++++- tests/quick.rs | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 528a1cfa8..c3d41bab3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3159,6 +3159,8 @@ pub trait Itertools: Iterator { /// assert_equal(v.iter().tail(1), v.iter().last()); /// /// assert_equal((0..100).tail(10), 90..100); + /// + /// assert_equal((0..100).filter(|x| x % 3 == 0).tail(10), (72..100).step_by(3)); /// ``` /// /// For double ended iterators without side-effects, you might prefer @@ -3176,7 +3178,9 @@ pub trait Itertools: Iterator { } 1 => self.last().into_iter().collect(), _ => { - let mut iter = self.fuse(); + // Skip the starting part of the iterator if possible. + let (low, _) = self.size_hint(); + let mut iter = self.fuse().skip(low.saturating_sub(n)); let mut data: Vec<_> = iter.by_ref().take(n).collect(); // Update `data` cyclically. let idx = iter.fold(0, |i, val| { diff --git a/tests/quick.rs b/tests/quick.rs index 8590303c7..aa61e3c1e 100644 --- a/tests/quick.rs +++ b/tests/quick.rs @@ -1952,6 +1952,8 @@ quickcheck! { fn tail(v: Vec, n: u8) -> bool { let n = n as usize; - itertools::equal(v.iter().tail(n), &v[v.len().saturating_sub(n)..]) + let result = &v[v.len().saturating_sub(n)..]; + itertools::equal(v.iter().tail(n), result) + && itertools::equal(v.iter().filter(|_| true).tail(n), result) } } From 3373c7a44fc483596b29009040587bc8f2d5de78 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet <44676486+Philippe-Cholet@users.noreply.github.com> Date: Thu, 14 Mar 2024 19:45:38 +0100 Subject: [PATCH 3/4] `Itertools::tail`: return `VecDeque` rather than `Vec` `rotate_left` is more efficient on `VecDeque` than on a slice. `VecDeque::from(vec)` is O(1) on recent rust. Co-Authored-By: scottmcm --- src/lib.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c3d41bab3..73f841aa6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,7 +55,7 @@ extern crate core as std; extern crate alloc; #[cfg(feature = "use_alloc")] -use alloc::{string::String, vec::Vec}; +use alloc::{collections::VecDeque, string::String, vec::Vec}; pub use either::Either; @@ -72,6 +72,8 @@ use std::fmt::Write; use std::hash::Hash; use std::iter::{once, IntoIterator}; #[cfg(feature = "use_alloc")] +type VecDequeIntoIter = alloc::collections::vec_deque::IntoIter; +#[cfg(feature = "use_alloc")] type VecIntoIter = alloc::vec::IntoIter; use std::iter::FromIterator; @@ -3146,8 +3148,10 @@ pub trait Itertools: Iterator { /// Consumes the iterator and return an iterator of the last `n` elements. /// - /// The iterator, if directly collected to a `Vec`, is converted + /// The iterator, if directly collected to a `VecDeque`, is converted /// without any extra copying or allocation cost. + /// If directly collected to a `Vec`, it may need some data movement + /// but no re-allocation. /// /// ``` /// use itertools::{assert_equal, Itertools}; @@ -3167,20 +3171,22 @@ pub trait Itertools: Iterator { /// `.rev().take(n).rev()` to have a similar result (lazy and non-allocating) /// without consuming the entire iterator. #[cfg(feature = "use_alloc")] - fn tail(self, n: usize) -> VecIntoIter + fn tail(self, n: usize) -> VecDequeIntoIter where Self: Sized, { match n { 0 => { self.last(); - Vec::new() + VecDeque::new() } 1 => self.last().into_iter().collect(), _ => { // Skip the starting part of the iterator if possible. let (low, _) = self.size_hint(); let mut iter = self.fuse().skip(low.saturating_sub(n)); + // TODO: If VecDeque has a more efficient method than + // `.pop_front();.push_back(val)` in the future then maybe revisit this. let mut data: Vec<_> = iter.by_ref().take(n).collect(); // Update `data` cyclically. let idx = iter.fold(0, |i, val| { @@ -3191,7 +3197,8 @@ pub trait Itertools: Iterator { i + 1 } }); - // Respect the insertion order. + // Respect the insertion order, efficiently. + let mut data = VecDeque::from(data); data.rotate_left(idx); data } From b51f26a34f018494c249ebf79daf9225ba619677 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet <44676486+Philippe-Cholet@users.noreply.github.com> Date: Sun, 17 Mar 2024 09:37:44 +0100 Subject: [PATCH 4/4] Check two invariants --- src/k_smallest.rs | 1 + src/lib.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/k_smallest.rs b/src/k_smallest.rs index fe699fbd4..b909887f5 100644 --- a/src/k_smallest.rs +++ b/src/k_smallest.rs @@ -57,6 +57,7 @@ where } iter.for_each(|val| { + debug_assert_eq!(storage.len(), k); if is_less_than(&val, &storage[0]) { // Treating this as an push-and-pop saves having to write a sift-up implementation. // https://en.wikipedia.org/wiki/Binary_heap#Insert_then_extract diff --git a/src/lib.rs b/src/lib.rs index 73f841aa6..a297a6d80 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3190,6 +3190,7 @@ pub trait Itertools: Iterator { let mut data: Vec<_> = iter.by_ref().take(n).collect(); // Update `data` cyclically. let idx = iter.fold(0, |i, val| { + debug_assert_eq!(data.len(), n); data[i] = val; if i + 1 == n { 0