Skip to content

[libc++] Optimize ranges::copy for forward_iterator and segmented_iterator #120134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 88 additions & 1 deletion libcxx/include/__algorithm/copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
#include <__algorithm/min.h>
#include <__config>
#include <__fwd/bit_reference.h>
#include <__iterator/distance.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/segmented_iterator.h>
#include <__memory/pointer_traits.h>
#include <__type_traits/common_type.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/is_convertible.h>
#include <__utility/move.h>
#include <__utility/pair.h>

Expand Down Expand Up @@ -154,6 +156,25 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
return __result;
}

template <class _InIter,
class _Sent,
__enable_if_t<__has_input_iterator_category<_InIter>::value &&
!__has_random_access_iterator_category<_InIter>::value,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InIter>::difference_type
__iter_sent_distance(_InIter __first, _Sent __last) {
typename iterator_traits<_InIter>::difference_type __r(0);
for (; __first != __last; ++__first)
++__r;
return __r;
}

template <class _InIter, class _Sent, __enable_if_t<__has_random_access_iterator_category<_InIter>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InIter>::difference_type
__iter_sent_distance(_InIter __first, _Sent __last) {
return static_cast<typename iterator_traits<_InIter>::difference_type>(__last - __first);
}

struct __copy_impl {
template <class _InIter, class _Sent, class _OutIter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
Expand Down Expand Up @@ -221,12 +242,78 @@ struct __copy_impl {
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> >
operator()(__bit_iterator<_Cp, _IsConst> __first,
__bit_iterator<_Cp, _IsConst> __last,
__bit_iterator<_Cp, false> __result) const {
__bit_iterator<_Cp, /* IsConst = */ false> __result) const {
if (__first.__ctz_ == __result.__ctz_)
return std::make_pair(__last, std::__copy_aligned(__first, __last, __result));
return std::make_pair(__last, std::__copy_unaligned(__first, __last, __result));
}

template <class _InIter, class _Cp, __enable_if_t<__is_segmented_iterator<_InIter>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, __bit_iterator<_Cp, false> >
operator()(_InIter __first, _InIter __last, __bit_iterator<_Cp, /* IsConst = */ false> __result) const {
std::__for_each_segment(__first, __last, _CopySegment<_InIter, __bit_iterator<_Cp, false> >(__result));
return std::make_pair(__last, std::move(__result));
}

template <class _InIter,
class _Sent,
class _Cp,
__enable_if_t<!__is_segmented_iterator<_InIter>::value &&
(__has_forward_iterator_category<_InIter>::value ||
__has_iterator_concept_convertible_to<_InIter, forward_iterator_tag>::value) &&
is_convertible<typename iterator_traits<_InIter>::value_type, bool>::value,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, __bit_iterator<_Cp, false> >
operator()(_InIter __first, _Sent __last, __bit_iterator<_Cp, /* IsConst = */ false> __result) const {
using _It = __bit_iterator<_Cp, false>;
using __storage_type = typename _It::__storage_type;
#if _LIBCPP_STD_VER >= 20
__storage_type __n = static_cast<__storage_type>(std::ranges::distance(__first, __last));
#else
__storage_type __n = static_cast<__storage_type>(std::__iter_sent_distance(__first, __last));
#endif
const unsigned __bits_per_word = _It::__bits_per_word;

if (__first != __last) {
// do first partial word, if present
if (__result.__ctz_ != 0) {
__storage_type __clz = static_cast<__storage_type>(__bits_per_word - __result.__ctz_);
__storage_type __dn = std::min(__clz, __n);
__storage_type __w = *__result.__seg_;
__storage_type __m = std::__middle_mask<__storage_type>(__clz - __dn, __result.__ctz_);
__w &= ~__m;
for (__storage_type __i = 0; __i < __dn; ++__i, ++__first)
__w |= static_cast<__storage_type>(*__first) << __result.__ctz_++;
*__result.__seg_ = __w;
if (__result.__ctz_ == __bits_per_word) {
__result.__ctz_ = 0;
++__result.__seg_;
}
__n -= __dn;
}
}
// do middle whole words, if present
__storage_type __nw = __n / __bits_per_word;
__n -= __nw * __bits_per_word;
for (; __nw; --__nw) {
__storage_type __w = 0;
for (__storage_type __i = 0; __i < __bits_per_word; ++__i, ++__first)
__w |= static_cast<__storage_type>(*__first) << __i;
*__result.__seg_++ = __w;
}
// do last partial word, if present
if (__n) {
__storage_type __w = 0;
for (__storage_type __i = 0; __i < __n; ++__i, ++__first)
__w |= static_cast<__storage_type>(*__first) << __i;
__storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __w;
__result.__ctz_ = __n;
}
return std::make_pair(std::move(__first), std::move(__result));
}

// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
Expand Down
13 changes: 13 additions & 0 deletions libcxx/include/__bit_reference
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@
#include <__functional/identity.h>
#include <__fwd/bit_reference.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/segmented_iterator.h>
#include <__memory/construct_at.h>
#include <__memory/pointer_traits.h>
#include <__type_traits/conditional.h>
#include <__type_traits/desugars_to.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_convertible.h>
#include <__type_traits/is_same.h>
#include <__type_traits/is_unsigned.h>
#include <__type_traits/void_t.h>
Expand Down Expand Up @@ -463,6 +465,17 @@ private:
template <class _Dp>
friend struct __bit_array;

template <class _InIter,
class _Sent,
class _Dp,
__enable_if_t<!__is_segmented_iterator<_InIter>::value &&
(__has_forward_iterator_category<_InIter>::value ||
__has_iterator_concept_convertible_to<_InIter, forward_iterator_tag>::value) &&
is_convertible<typename iterator_traits<_InIter>::value_type, bool>::value,
int> >
_LIBCPP_CONSTEXPR_SINCE_CXX14 friend pair<_InIter, __bit_iterator<_Dp, false> >
__copy_impl::operator()(_InIter __first, _Sent __last, __bit_iterator<_Dp, false> __result) const;

template <bool _FillVal, class _Dp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend void
__fill_n_bool(__bit_iterator<_Dp, false> __first, typename __size_difference_type_traits<_Dp>::size_type __n);
Expand Down
1 change: 0 additions & 1 deletion libcxx/include/__vector/vector_bool.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator> {
if constexpr (ranges::forward_range<_Range> || ranges::sized_range<_Range>) {
auto __n = static_cast<size_type>(ranges::distance(__range));
__init_with_size(ranges::begin(__range), ranges::end(__range), __n);

} else {
__init_with_sentinel(ranges::begin(__range), ranges::end(__range));
}
Expand Down
47 changes: 43 additions & 4 deletions libcxx/test/benchmarks/algorithms/modifying/copy.bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,23 @@

#include "benchmark/benchmark.h"
#include "../../GenerateInput.h"
#include "test_iterators.h"
#include "test_macros.h"

int main(int argc, char** argv) {
auto std_copy = [](auto first, auto last, auto out) { return std::copy(first, last, out); };

// {std,ranges}::copy(normal container)
{
auto bm = []<class Container>(std::string name, auto copy) {
auto bm = []<class ContainerIn, class ContainerOut = std::vector<typename ContainerIn::value_type>>(
std::string name, auto copy) {
benchmark::RegisterBenchmark(name, [copy](auto& st) {
std::size_t const n = st.range(0);
using ValueType = typename Container::value_type;
Container c;
using ValueType = typename ContainerIn::value_type;
ContainerIn c;
std::generate_n(std::back_inserter(c), n, [] { return Generate<ValueType>::random(); });

std::vector<ValueType> out(n);
ContainerOut out(n);

for ([[maybe_unused]] auto _ : st) {
benchmark::DoNotOptimize(c);
Expand All @@ -42,12 +44,23 @@ int main(int argc, char** argv) {
}
})->Range(8, 1 << 20);
};
// Copy from normal containers to vector<int>
bm.operator()<std::vector<int>>("std::copy(vector<int>)", std_copy);
bm.operator()<std::deque<int>>("std::copy(deque<int>)", std_copy);
bm.operator()<std::list<int>>("std::copy(list<int>)", std_copy);
bm.operator()<std::vector<int>>("rng::copy(vector<int>)", std::ranges::copy);
bm.operator()<std::deque<int>>("rng::copy(deque<int>)", std::ranges::copy);
bm.operator()<std::list<int>>("rng::copy(list<int>)", std::ranges::copy);

#if TEST_STD_VER >= 23 // vector<bool>::iterator is not an output_iterator before C++23
// Copy from normal containers to vector<bool>
bm.operator()<std::vector<int>, std::vector<bool>>("std::copy(vector<int>, std::vector<bool>)", std_copy);
bm.operator()<std::deque<int>, std::vector<bool>>("std::copy(deque<int>, std::vector<bool>)", std_copy);
bm.operator()<std::list<int>, std::vector<bool>>("std::copy(list<int>, std::vector<bool>)", std_copy);
bm.operator()<std::vector<int>, std::vector<bool>>("rng::copy(vector<int>, std::vector<bool>)", std::ranges::copy);
bm.operator()<std::deque<int>, std::vector<bool>>("rng::copy(deque<int>, std::vector<bool>)", std::ranges::copy);
bm.operator()<std::list<int>, std::vector<bool>>("rng::copy(list<int>, std::vector<bool>)", std::ranges::copy);
#endif
}

// {std,ranges}::copy(vector<bool>)
Expand Down Expand Up @@ -76,6 +89,32 @@ int main(int argc, char** argv) {
#endif
}

// {std,ranges}::copy(forward_iterator, forward_iterator, vector<bool>)
{
auto bm = []<template <class> class Iter>(std::string name, auto copy) {
benchmark::RegisterBenchmark(name, [copy](auto& st) {
std::size_t const n = st.range(0);
std::vector<int> in(n, 1);
std::vector<bool> out(n);
auto first = Iter(in.begin());
auto last = Iter(in.end());
auto dst = out.begin();
for ([[maybe_unused]] auto _ : st) {
benchmark::DoNotOptimize(in);
benchmark::DoNotOptimize(out);
auto result = copy(first, last, dst);
benchmark::DoNotOptimize(result);
}
})->Range(64, 1 << 20);
};
bm.operator()<forward_iterator>("std::copy(forward_iterator, vector<bool>)", std_copy);
bm.operator()<random_access_iterator>("std::copy(random_access_iterator, vector<bool>)", std_copy);
#if TEST_STD_VER >= 23 // vector<bool>::iterator is not an output_iterator before C++23
bm.operator()<forward_iterator>("rng::copy(forward_iterator, vector<bool>)", std::ranges::copy);
bm.operator()<random_access_iterator>("rng::copy(random_access_iterator, vector<bool>)", std::ranges::copy);
#endif
}

benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
Expand Down
Loading