Skip to content

[libc++] Optimize ranges::copy for random_access_iterator inputs and vector<bool> iterator outputs #120134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions libcxx/docs/ReleaseNotes/21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ Improvements and New Features
- The ``flat_map::insert`` and ``flat_set::insert_range`` have been optimized, resulting in a performance improvement of up
to 10x for inserting elements into a ``flat_map`` when the input range is a ``flat_map`` or a ``zip_view``.

- The ``std::copy`` and ``std::ranges::copy`` algorithms have been optimized for copying from ``random_access_iterator``s
to ``std::vector<bool>::iterator``, resulting in a performance improvement of up to 3x. As a result, range-based
operations of ``std::vector<bool>``, including construction, ``assign_range``, ``insert_range``, ``append_range``, and
their iterator-pair counterparts, have also benefited from a similar 3x speedup.

Deprecations and Removals
-------------------------

Expand Down
64 changes: 63 additions & 1 deletion libcxx/include/__algorithm/copy.h
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not attached to the file: Let's add a release note for this!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <__memory/pointer_traits.h>
#include <__type_traits/common_type.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/is_convertible.h>
#include <__utility/move.h>
#include <__utility/pair.h>

Expand Down Expand Up @@ -221,12 +222,73 @@ struct __copy_impl {
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> >
operator()(__bit_iterator<_Cp, _IsConst> __first,
__bit_iterator<_Cp, _IsConst> __last,
__bit_iterator<_Cp, false> __result) const {
__bit_iterator<_Cp, /* IsConst = */ false> __result) const {
if (__first.__ctz_ == __result.__ctz_)
return std::make_pair(__last, std::__copy_aligned(__first, __last, __result));
return std::make_pair(__last, std::__copy_unaligned(__first, __last, __result));
}

template < class _InIter,
class _Cp,
__enable_if_t<!__is_segmented_iterator<_InIter>::value &&
(__has_random_access_iterator_category<_InIter>::value ||
__has_iterator_concept_convertible_to<_InIter, random_access_iterator_tag>::value) &&
is_convertible<typename iterator_traits<_InIter>::value_type, bool>::value,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, __bit_iterator<_Cp, false> >
operator()(_InIter __first, _InIter __last, __bit_iterator<_Cp, /* IsConst = */ false> __result) const {
using _It = __bit_iterator<_Cp, false>;
using __storage_type = typename _It::__storage_type;
const unsigned __bits_per_word = _It::__bits_per_word;
__storage_type __n = static_cast<__storage_type>(__last - __first);

if (__first != __last) {
// do first partial word, if present
if (__result.__ctz_ != 0) {
__storage_type __clz = static_cast<__storage_type>(__bits_per_word - __result.__ctz_);
__storage_type __dn = std::min(__clz, __n);
__storage_type __w = *__result.__seg_;
__storage_type __m = std::__middle_mask<__storage_type>(__clz - __dn, __result.__ctz_);
__w &= ~__m;
for (__storage_type __i = 0; __i < __dn; ++__i, ++__first) {
if (*__first)
__w |= static_cast<__storage_type>(1) << (__result.__ctz_ + __i);
}
__result.__ctz_ += __dn;
*__result.__seg_ = __w;
if (__result.__ctz_ == __bits_per_word) {
__result.__ctz_ = 0;
++__result.__seg_;
}
__n -= __dn;
}
}
// do middle whole words, if present
__storage_type __nw = __n / __bits_per_word;
__n -= __nw * __bits_per_word;
for (; __nw; --__nw) {
__storage_type __w = 0;
for (__storage_type __i = 0; __i < __bits_per_word; ++__i, ++__first) {
if (*__first)
__w |= static_cast<__storage_type>(1) << __i;
}
*__result.__seg_++ = __w;
}
// do last partial word, if present
if (__n) {
__storage_type __w = 0;
for (__storage_type __i = 0; __i < __n; ++__i, ++__first) {
if (*__first)
__w |= static_cast<__storage_type>(1) << __i;
}
__storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __w;
__result.__ctz_ = __n;
}
return std::make_pair(std::move(__first), std::move(__result));
}

// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
Expand Down
5 changes: 1 addition & 4 deletions libcxx/include/__bit_reference
Original file line number Diff line number Diff line change
Expand Up @@ -477,10 +477,7 @@ private:
template <class _Dp, bool _IC>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_unaligned(
__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
template <class _Dp, bool _IC>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, _IC>, __bit_iterator<_Dp, false> >
__copy_impl::operator()(
__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result) const;
friend struct __copy_impl;
template <class _Dp, bool _IC>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_backward_aligned(
__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
Expand Down
22 changes: 18 additions & 4 deletions libcxx/test/benchmarks/algorithms/modifying/copy.bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,23 @@

#include "benchmark/benchmark.h"
#include "../../GenerateInput.h"
#include "test_iterators.h"
#include "test_macros.h"

int main(int argc, char** argv) {
auto std_copy = [](auto first, auto last, auto out) { return std::copy(first, last, out); };

// {std,ranges}::copy(normal container)
{
auto bm = []<class Container>(std::string name, auto copy) {
auto bm = []<class ContainerIn, class ContainerOut = std::vector<typename ContainerIn::value_type>>(
std::string name, auto copy) {
benchmark::RegisterBenchmark(name, [copy](auto& st) {
std::size_t const n = st.range(0);
using ValueType = typename Container::value_type;
Container c;
using ValueType = typename ContainerIn::value_type;
ContainerIn c;
std::generate_n(std::back_inserter(c), n, [] { return Generate<ValueType>::random(); });

std::vector<ValueType> out(n);
ContainerOut out(n);

for ([[maybe_unused]] auto _ : st) {
benchmark::DoNotOptimize(c);
Expand All @@ -42,12 +44,24 @@ int main(int argc, char** argv) {
}
})->Range(8, 1 << 20);
};
// Copy from normal containers to vector<int>
bm.operator()<std::vector<int>>("std::copy(vector<int>)", std_copy);
bm.operator()<std::deque<int>>("std::copy(deque<int>)", std_copy);
bm.operator()<std::list<int>>("std::copy(list<int>)", std_copy);
bm.operator()<std::vector<int>>("rng::copy(vector<int>)", std::ranges::copy);
bm.operator()<std::deque<int>>("rng::copy(deque<int>)", std::ranges::copy);
bm.operator()<std::list<int>>("rng::copy(list<int>)", std::ranges::copy);

// Copy from normal containers to vector<bool>
// Note: vector<bool>::iterator is not an output_iterator before C++23
#if TEST_STD_VER >= 23
bm.operator()<std::vector<int>, std::vector<bool>>("std::copy(vector<int>, std::vector<bool>)", std_copy);
bm.operator()<std::deque<int>, std::vector<bool>>("std::copy(deque<int>, std::vector<bool>)", std_copy);
bm.operator()<std::list<int>, std::vector<bool>>("std::copy(list<int>, std::vector<bool>)", std_copy);
bm.operator()<std::vector<int>, std::vector<bool>>("rng::copy(vector<int>, std::vector<bool>)", std::ranges::copy);
bm.operator()<std::deque<int>, std::vector<bool>>("rng::copy(deque<int>, std::vector<bool>)", std::ranges::copy);
bm.operator()<std::list<int>, std::vector<bool>>("rng::copy(list<int>, std::vector<bool>)", std::ranges::copy);
#endif
}

// {std,ranges}::copy(vector<bool>)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "benchmark/benchmark.h"
#include "test_iterators.h"
#include "../../GenerateInput.h"
#include "../../../std/containers/from_range_helpers.h"

namespace support {

Expand Down Expand Up @@ -172,8 +173,8 @@ void sequence_container_benchmarks(std::string container) {
bool toggle = false;
for ([[maybe_unused]] auto _ : st) {
std::vector<ValueType>& in = toggle ? in1 : in2;
auto first = in.data();
auto last = in.data() + in.size();
auto first = in.begin();
auto last = in.end();
c.assign(cpp17_input_iterator(first), cpp17_input_iterator(last));
toggle = !toggle;
DoNotOptimizeData(c);
Expand Down Expand Up @@ -237,8 +238,8 @@ void sequence_container_benchmarks(std::string container) {
std::vector<ValueType> in;
std::generate_n(std::back_inserter(in), size, gen);
DoNotOptimizeData(in);
auto first = in.data();
auto last = in.data() + in.size();
auto first = in.begin();
auto last = in.end();

const int small = 100; // arbitrary
Container c;
Expand All @@ -264,8 +265,8 @@ void sequence_container_benchmarks(std::string container) {
std::vector<ValueType> in;
std::generate_n(std::back_inserter(in), size, gen);
DoNotOptimizeData(in);
auto first = in.data();
auto last = in.data() + in.size();
auto first = in.begin();
auto last = in.end();

const int overflow = size / 10; // 10% of elements won't fit in the vector when we insert
Container c;
Expand All @@ -290,8 +291,8 @@ void sequence_container_benchmarks(std::string container) {
std::vector<ValueType> in;
std::generate_n(std::back_inserter(in), size, gen);
DoNotOptimizeData(in);
auto first = in.data();
auto last = in.data() + in.size();
auto first = in.begin();
auto last = in.end();

auto const overflow = 9 * (size / 10); // 90% of elements won't fit in the vector when we insert
Container c;
Expand Down Expand Up @@ -448,6 +449,173 @@ void sequence_container_benchmarks(std::string container) {
}
});
}

////////////////////////////////////////////////////////////////////////////////////////////////
// Additional benchmarks for vector<bool> iterator-pair and range-based operations
////////////////////////////////////////////////////////////////////////////////////////////////

static constexpr bool is_vector_bool = requires {
typename Container::allocator_type;
} && std::same_as<std::remove_cvref_t<Container>, std::vector<bool, typename Container::allocator_type>>;

if constexpr (is_vector_bool) {
auto bench_vb = [&](std::string operation, auto f) {
benchmark::RegisterBenchmark(container + "::" + operation, f)->Arg(1024)->Arg(1 << 16)->Arg(1 << 20);
};

{ // iterator-pair ctor
auto bm = [&generators, &bench_vb, &tostr]<template <class> class Iter>(std::string iter) {
for (auto gen : generators)
bench_vb("ctor(" + iter + ", " + iter + ")" + tostr(gen), [gen](auto& st) {
auto const size = st.range(0);
std::vector<int> in;
std::generate_n(std::back_inserter(in), size, gen);
benchmark::DoNotOptimize(in);
const auto begin = Iter(in.begin());
const auto end = Iter(in.end());
benchmark::DoNotOptimize(in);

for ([[maybe_unused]] auto _ : st) {
Container c(begin, end); // we assume the destructor doesn't dominate the benchmark
DoNotOptimizeData(c);
}
});
};
bm.template operator()<random_access_iterator>("ra_iter");
}
{ // iterator-pair assignment
auto bm = [&generators, &bench_vb, &tostr]<template <class> class Iter>(std::string iter) {
for (auto gen : generators)
bench_vb("assign(" + iter + ", " + iter + ")" + tostr(gen), [gen](auto& st) {
auto const size = st.range(0);
std::vector<int> in1, in2;
std::generate_n(std::back_inserter(in1), size, gen);
std::generate_n(std::back_inserter(in2), size, gen);
DoNotOptimizeData(in1);
DoNotOptimizeData(in2);

Container c(in1.begin(), in1.end());
bool toggle = true;
for ([[maybe_unused]] auto _ : st) {
auto& in = toggle ? in2 : in1;
c.assign(Iter(in.begin()), Iter(in.end()));
toggle = !toggle;
DoNotOptimizeData(c);
}
});
};
bm.template operator()<random_access_iterator>("ra_iter");
}
{ // Iterator-pair insertion
auto bm = [&generators, &bench_vb, &tostr]<template <class> class Iter>(std::string iter) {
for (auto gen : generators)
bench_vb("insert(begin, " + iter + ", " + iter + ")" + tostr(gen), [gen](auto& st) {
auto const size = st.range(0);
std::vector<int> in;
Container c;
std::generate_n(std::back_inserter(in), size, gen);
std::generate_n(std::back_inserter(c), size, gen);
DoNotOptimizeData(in);
DoNotOptimizeData(c);

for ([[maybe_unused]] auto _ : st) {
c.insert(c.begin(), Iter(in.begin()), Iter(in.end()));
c.erase(c.begin() + size, c.end()); // avoid growing indefinitely
DoNotOptimizeData(c);
}
});
};
bm.template operator()<random_access_iterator>("ra_iter");
}

#if defined(__cpp_lib_containers_ranges) && __cpp_lib_containers_ranges >= 202202L
{ // Range-ctor
auto bm = [&generators, &bench_vb, &tostr]<template <class> class Iter>(std::string range) {
for (auto gen : generators)
bench_vb("ctor(" + range + ")" + tostr(gen), [gen](auto& st) {
auto const size = st.range(0);
std::vector<int> in;
std::generate_n(std::back_inserter(in), size, gen);
std::ranges::subrange rg(Iter(std::ranges::begin(in)), Iter(std::ranges::end(in)));
benchmark::DoNotOptimize(in);

for ([[maybe_unused]] auto _ : st) {
Container c(std::from_range, rg); // we assume the destructor doesn't dominate the benchmark
DoNotOptimizeData(c);
}
});
};
bm.template operator()<cpp20_random_access_iterator>("ra_range");
}
{ // Range-assignment
auto bm = [&generators, &bench_vb, &tostr]<template <class> class Iter>(std::string range) {
for (auto gen : generators)
bench_vb("assign_range(" + range + ")" + tostr(gen), [gen](auto& st) {
auto const size = st.range(0);
std::vector<int> in1, in2;
std::generate_n(std::back_inserter(in1), size, gen);
std::generate_n(std::back_inserter(in2), size, gen);
std::ranges::subrange rg1(Iter(std::ranges::begin(in1)), Iter(std::ranges::end(in1)));
std::ranges::subrange rg2(Iter(std::ranges::begin(in2)), Iter(std::ranges::end(in2)));
DoNotOptimizeData(in1);
DoNotOptimizeData(in2);

Container c(std::from_range, rg1);
bool toggle = true;
for ([[maybe_unused]] auto _ : st) {
auto& in = toggle ? rg2 : rg1;
c.assign_range(in);
toggle = !toggle;
DoNotOptimizeData(c);
}
});
};
bm.template operator()<cpp20_random_access_iterator>("ra_range");
}
{ // Range-insertion
auto bm = [&generators, &bench_vb, &tostr]<template <class> class Iter>(std::string range) {
for (auto gen : generators)
bench_vb("insert_range(" + range + ")" + tostr(gen), [gen](auto& st) {
auto const size = st.range(0);
std::vector<int> in;
Container c;
std::generate_n(std::back_inserter(in), size, gen);
std::generate_n(std::back_inserter(c), size, gen);
std::ranges::subrange rg(Iter(std::ranges::begin(in)), Iter(std::ranges::end(in)));
DoNotOptimizeData(in);
DoNotOptimizeData(c);

for ([[maybe_unused]] auto _ : st) {
c.insert_range(c.begin(), rg);
c.erase(c.begin() + size, c.end()); // avoid growing indefinitely
DoNotOptimizeData(c);
}
});
};
bm.template operator()<cpp20_random_access_iterator>("ra_range");
}
{ // Range-append
auto bm = [&generators, &bench_vb, &tostr]<template <class> class Iter>(std::string range) {
for (auto gen : generators)
bench_vb("append_range(" + range + ")" + tostr(gen), [gen](auto& st) {
auto const size = st.range(0);
std::vector<int> in;
std::generate_n(std::back_inserter(in), size, gen);
std::ranges::subrange rg(Iter(std::ranges::begin(in)), Iter(std::ranges::end(in)));
DoNotOptimizeData(in);

Container c;
for ([[maybe_unused]] auto _ : st) {
c.append_range(rg);
c.erase(c.begin(), c.end()); // avoid growing indefinitely
DoNotOptimizeData(c);
}
});
};
bm.template operator()<cpp20_random_access_iterator>("ra_range");
}
#endif
}
}

} // namespace support
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
int main(int argc, char** argv) {
support::sequence_container_benchmarks<std::vector<int>>("std::vector<int>");
support::sequence_container_benchmarks<std::vector<std::string>>("std::vector<std::string>");
support::sequence_container_benchmarks<std::vector<bool>>("std::vector<bool>");

benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
Expand Down
Loading
Loading