Skip to content

Commit a4f4910

Browse files
committed
Speed-up range operations in vector<bool>
1 parent 4ea44eb commit a4f4910

File tree

9 files changed

+403
-52
lines changed

9 files changed

+403
-52
lines changed

libcxx/include/__algorithm/copy.h

+80
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,13 @@
1313
#include <__algorithm/for_each_segment.h>
1414
#include <__algorithm/min.h>
1515
#include <__config>
16+
#include <__fwd/bit_reference.h>
17+
#include <__iterator/distance.h>
1618
#include <__iterator/iterator_traits.h>
1719
#include <__iterator/segmented_iterator.h>
1820
#include <__type_traits/common_type.h>
1921
#include <__type_traits/enable_if.h>
22+
#include <__type_traits/is_convertible.h>
2023
#include <__utility/move.h>
2124
#include <__utility/pair.h>
2225

@@ -32,6 +35,25 @@ _LIBCPP_BEGIN_NAMESPACE_STD
3235
template <class _InIter, class _Sent, class _OutIter>
3336
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter);
3437

38+
template <class _InIter,
39+
class _Sent,
40+
__enable_if_t<__has_input_iterator_category<_InIter>::value &&
41+
!__has_random_access_iterator_category<_InIter>::value,
42+
int> = 0>
43+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InIter>::difference_type
44+
__iter_sent_distance(_InIter __first, _Sent __last) {
45+
typename iterator_traits<_InIter>::difference_type __r(0);
46+
for (; __first != __last; ++__first)
47+
++__r;
48+
return __r;
49+
}
50+
51+
template <class _InIter, class _Sent, __enable_if_t<__has_random_access_iterator_category<_InIter>::value, int> = 0>
52+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InIter>::difference_type
53+
__iter_sent_distance(_InIter __first, _Sent __last) {
54+
return static_cast<typename iterator_traits<_InIter>::difference_type>(__last - __first);
55+
}
56+
3557
struct __copy_impl {
3658
template <class _InIter, class _Sent, class _OutIter>
3759
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
@@ -95,6 +117,64 @@ struct __copy_impl {
95117
}
96118
}
97119

120+
template <class _InIter,
121+
class _Sent,
122+
class _Cp,
123+
__enable_if_t<(__has_forward_iterator_category<_InIter>::value ||
124+
__has_iterator_concept_convertible_to<_InIter, forward_iterator_tag>::value) &&
125+
is_convertible<typename iterator_traits<_InIter>::value_type, bool>::value,
126+
int> = 0>
127+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, __bit_iterator<_Cp, false> >
128+
operator()(_InIter __first, _Sent __last, __bit_iterator<_Cp, false> __result) const {
129+
using _It = __bit_iterator<_Cp, false>;
130+
using __storage_type = typename _It::__storage_type;
131+
#if _LIBCPP_STD_VER >= 20
132+
__storage_type __n = static_cast<__storage_type>(std::ranges::distance(__first, __last));
133+
#else
134+
__storage_type __n = static_cast<__storage_type>(std::__iter_sent_distance(__first, __last));
135+
#endif
136+
const unsigned __bits_per_word = _It::__bits_per_word;
137+
138+
if (__first != __last) {
139+
// do first partial word, if present
140+
if (__result.__ctz_ != 0) {
141+
__storage_type __clz = static_cast<__storage_type>(__bits_per_word - __result.__ctz_);
142+
__storage_type __dn = std::min(__clz, __n);
143+
__storage_type __w = *__result.__seg_;
144+
__storage_type __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
145+
__w &= ~__m;
146+
for (__storage_type __i = 0; __i < __dn; ++__i, ++__first)
147+
__w |= static_cast<__storage_type>(*__first) << __result.__ctz_++;
148+
*__result.__seg_ = __w;
149+
if (__result.__ctz_ == __bits_per_word) {
150+
__result.__ctz_ = 0;
151+
++__result.__seg_;
152+
}
153+
__n -= __dn;
154+
}
155+
}
156+
// do middle whole words, if present
157+
__storage_type __nw = __n / __bits_per_word;
158+
__n -= __nw * __bits_per_word;
159+
for (; __nw; --__nw) {
160+
__storage_type __w = 0;
161+
for (__storage_type __i = 0; __i < __bits_per_word; ++__i, ++__first)
162+
__w |= static_cast<__storage_type>(*__first) << __i;
163+
*__result.__seg_++ = __w;
164+
}
165+
// do last partial word, if present
166+
if (__n) {
167+
__storage_type __w = 0;
168+
for (__storage_type __i = 0; __i < __n; ++__i, ++__first)
169+
__w |= static_cast<__storage_type>(*__first) << __i;
170+
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
171+
*__result.__seg_ &= ~__m;
172+
*__result.__seg_ |= __w;
173+
__result.__ctz_ = __n;
174+
}
175+
return std::make_pair(std::move(__first), std::move(__result));
176+
}
177+
98178
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
99179
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
100180
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>

libcxx/include/__bit_reference

+14
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifndef _LIBCPP___BIT_REFERENCE
1111
#define _LIBCPP___BIT_REFERENCE
1212

13+
#include <__algorithm/copy.h>
1314
#include <__algorithm/copy_n.h>
1415
#include <__algorithm/min.h>
1516
#include <__bit/countr.h>
@@ -22,8 +23,11 @@
2223
#include <__memory/construct_at.h>
2324
#include <__memory/pointer_traits.h>
2425
#include <__type_traits/conditional.h>
26+
#include <__type_traits/enable_if.h>
2527
#include <__type_traits/is_constant_evaluated.h>
28+
#include <__type_traits/is_convertible.h>
2629
#include <__type_traits/void_t.h>
30+
#include <__utility/pair.h>
2731
#include <__utility/swap.h>
2832

2933
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -978,6 +982,16 @@ private:
978982
template <class _Dp>
979983
friend struct __bit_array;
980984

985+
template <class _InIter,
986+
class _Sent,
987+
class _Dp,
988+
__enable_if_t<(__has_forward_iterator_category<_InIter>::value ||
989+
__has_iterator_concept_convertible_to<_InIter, forward_iterator_tag>::value) &&
990+
is_convertible<typename iterator_traits<_InIter>::value_type, bool>::value,
991+
int> >
992+
_LIBCPP_CONSTEXPR_SINCE_CXX14 friend pair<_InIter, __bit_iterator<_Dp, false> >
993+
__copy_impl::operator()(_InIter __first, _Sent __last, __bit_iterator<_Dp, false> __result) const;
994+
981995
template <bool _FillVal, class _Dp>
982996
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend void
983997
__fill_n_bool(__bit_iterator<_Dp, false> __first, typename __size_difference_type_traits<_Dp>::size_type __n);

libcxx/include/__vector/vector_bool.h

-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,6 @@ class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator> {
171171
if constexpr (ranges::forward_range<_Range> || ranges::sized_range<_Range>) {
172172
auto __n = static_cast<size_type>(ranges::distance(__range));
173173
__init_with_size(ranges::begin(__range), ranges::end(__range), __n);
174-
175174
} else {
176175
__init_with_sentinel(ranges::begin(__range), ranges::end(__range));
177176
}

libcxx/test/benchmarks/containers/ContainerBenchmarks.h

+104-14
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010
#ifndef BENCHMARK_CONTAINER_BENCHMARKS_H
1111
#define BENCHMARK_CONTAINER_BENCHMARKS_H
1212

13+
#include <__type_traits/type_identity.h>
1314
#include <cassert>
1415
#include <iterator>
1516
#include <utility>
1617

1718
#include "benchmark/benchmark.h"
19+
#include "../../std/containers/from_range_helpers.h"
1820
#include "../Utilities.h"
1921
#include "test_iterators.h"
2022

@@ -51,16 +53,57 @@ void BM_Assignment(benchmark::State& st, Container) {
5153
}
5254
}
5355

56+
template <class Container, class Generator, class InputIter = decltype(std::declval<Generator>()(0).begin())>
57+
void BM_AssignIterIter(benchmark::State& st, Generator gen, InputIter = {}) {
58+
using T = typename Container::value_type;
59+
auto size = st.range(0);
60+
auto in1 = gen(size);
61+
auto in2 = gen(size);
62+
DoNotOptimizeData(in1);
63+
DoNotOptimizeData(in2);
64+
Container c(in1.begin(), in1.end());
65+
DoNotOptimizeData(c);
66+
bool toggle = false;
67+
for (auto _ : st) {
68+
std::vector<T>& in = toggle ? in1 : in2;
69+
auto first = in.begin();
70+
auto last = in.end();
71+
c.assign(InputIter(first), InputIter(last));
72+
toggle = !toggle;
73+
DoNotOptimizeData(c);
74+
}
75+
}
76+
77+
template <typename Container, class Generator, class Range = std::__type_identity_t<Container>>
78+
void BM_AssignRange(benchmark::State& st, Generator gen, Range = {}) {
79+
auto size = st.range(0);
80+
auto in1 = gen(size);
81+
auto in2 = gen(size);
82+
DoNotOptimizeData(in1);
83+
DoNotOptimizeData(in2);
84+
Range rg1(std::ranges::begin(in1), std::ranges::end(in1));
85+
Range rg2(std::ranges::begin(in2), std::ranges::end(in2));
86+
Container c(std::from_range, rg1);
87+
DoNotOptimizeData(c);
88+
bool toggle = false;
89+
for (auto _ : st) {
90+
auto& rg = toggle ? rg1 : rg2;
91+
c.assign_range(rg);
92+
toggle = !toggle;
93+
DoNotOptimizeData(c);
94+
}
95+
}
96+
5497
template <std::size_t... sz, typename Container, typename GenInputs>
5598
void BM_AssignInputIterIter(benchmark::State& st, Container c, GenInputs gen) {
5699
auto v = gen(1, sz...);
57100
c.resize(st.range(0), v[0]);
58101
auto in = gen(st.range(1), sz...);
59-
benchmark::DoNotOptimize(&in);
60-
benchmark::DoNotOptimize(&c);
102+
DoNotOptimizeData(in);
103+
DoNotOptimizeData(c);
61104
for (auto _ : st) {
62105
c.assign(cpp17_input_iterator(in.begin()), cpp17_input_iterator(in.end()));
63-
benchmark::ClobberMemory();
106+
DoNotOptimizeData(c);
64107
}
65108
}
66109

@@ -73,24 +116,25 @@ void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container::
73116
}
74117
}
75118

76-
template <class Container, class GenInputs>
77-
void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {
78-
auto in = gen(st.range(0));
79-
const auto begin = in.begin();
80-
const auto end = in.end();
81-
benchmark::DoNotOptimize(&in);
119+
template <class Container, class GenInputs, class InputIter = decltype(std::declval<GenInputs>()(0).begin())>
120+
void BM_ConstructIterIter(benchmark::State& st, GenInputs gen, InputIter = {}) {
121+
auto in = gen(st.range(0));
122+
DoNotOptimizeData(in);
123+
const auto begin = InputIter(in.begin());
124+
const auto end = InputIter(in.end());
82125
while (st.KeepRunning()) {
83-
Container c(begin, end);
126+
Container c(begin, end); // we assume the destructor doesn't dominate the benchmark
84127
DoNotOptimizeData(c);
85128
}
86129
}
87130

88-
template <class Container, class GenInputs>
89-
void BM_ConstructFromRange(benchmark::State& st, Container, GenInputs gen) {
131+
template <class Container, class GenInputs, class Range = std::__type_identity_t<Container>>
132+
void BM_ConstructFromRange(benchmark::State& st, GenInputs gen, Range = {}) {
90133
auto in = gen(st.range(0));
91-
benchmark::DoNotOptimize(&in);
134+
DoNotOptimizeData(in);
135+
Range rg(std::ranges::begin(in), std::ranges::end(in));
92136
while (st.KeepRunning()) {
93-
Container c(std::from_range, in);
137+
Container c(std::from_range, rg); // we assume the destructor doesn't dominate the benchmark
94138
DoNotOptimizeData(c);
95139
}
96140
}
@@ -108,6 +152,52 @@ void BM_Pushback_no_grow(benchmark::State& state, Container c) {
108152
}
109153
}
110154

155+
template <class Container, class GenInputs, class InputIter = decltype(std::declval<GenInputs>()(0).begin())>
156+
void BM_InsertIterIterIter(benchmark::State& st, GenInputs gen, InputIter = {}) {
157+
auto in = gen(st.range(0));
158+
DoNotOptimizeData(in);
159+
const auto beg = InputIter(in.begin());
160+
const auto end = InputIter(in.end());
161+
const unsigned size = 100;
162+
Container c(size);
163+
DoNotOptimizeData(c);
164+
for (auto _ : st) {
165+
c.insert(c.begin(), beg, end);
166+
DoNotOptimizeData(c);
167+
c.erase(c.begin() + size, c.end()); // avoid growing indefinitely
168+
}
169+
}
170+
171+
template <class Container, class GenInputs, class Range = std::__type_identity_t<Container>>
172+
void BM_InsertRange(benchmark::State& st, GenInputs gen, Range = {}) {
173+
auto in = gen(st.range(0));
174+
DoNotOptimizeData(in);
175+
Range rg(std::ranges::begin(in), std::ranges::end(in));
176+
const unsigned size = 100;
177+
Container c(size);
178+
DoNotOptimizeData(c);
179+
for (auto _ : st) {
180+
c.insert_range(c.begin(), rg);
181+
DoNotOptimizeData(c);
182+
c.erase(c.begin() + size, c.end()); // avoid growing indefinitely
183+
}
184+
}
185+
186+
template <class Container, class GenInputs, class Range = std::__type_identity_t<Container>>
187+
void BM_AppendRange(benchmark::State& st, GenInputs gen, Range = {}) {
188+
auto in = gen(st.range(0));
189+
DoNotOptimizeData(in);
190+
Range rg(std::ranges::begin(in), std::ranges::end(in));
191+
const unsigned size = 100;
192+
Container c(size);
193+
DoNotOptimizeData(c);
194+
for (auto _ : st) {
195+
c.append_range(rg);
196+
DoNotOptimizeData(c);
197+
c.erase(c.begin() + size, c.end()); // avoid growing indefinitely
198+
}
199+
}
200+
111201
template <class Container, class GenInputs>
112202
void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
113203
auto in = gen(st.range(0));

libcxx/test/benchmarks/containers/deque.bench.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,22 @@ BENCHMARK_CAPTURE(BM_ConstructSize, deque_byte, std::deque<unsigned char>{})->Ar
2424

2525
BENCHMARK_CAPTURE(BM_ConstructSizeValue, deque_byte, std::deque<unsigned char>{}, 0)->Arg(5140480);
2626

27-
BENCHMARK_CAPTURE(BM_ConstructIterIter, deque_char, std::deque<char>{}, getRandomIntegerInputs<char>)
27+
BENCHMARK_CAPTURE(BM_ConstructIterIter<std::deque<char>>, deque_char, getRandomIntegerInputs<char>)
2828
->Arg(TestNumInputs);
2929

30-
BENCHMARK_CAPTURE(BM_ConstructIterIter, deque_size_t, std::deque<size_t>{}, getRandomIntegerInputs<size_t>)
30+
BENCHMARK_CAPTURE(BM_ConstructIterIter<std::deque<size_t>>, deque_size_t, getRandomIntegerInputs<size_t>)
3131
->Arg(TestNumInputs);
3232

33-
BENCHMARK_CAPTURE(BM_ConstructIterIter, deque_string, std::deque<std::string>{}, getRandomStringInputs)
33+
BENCHMARK_CAPTURE(BM_ConstructIterIter<std::deque<std::string>>, deque_string, getRandomStringInputs)
3434
->Arg(TestNumInputs);
3535

36-
BENCHMARK_CAPTURE(BM_ConstructFromRange, deque_char, std::deque<char>{}, getRandomIntegerInputs<char>)
36+
BENCHMARK_CAPTURE(BM_ConstructFromRange<std::deque<char>>, deque_char, getRandomIntegerInputs<char>)
3737
->Arg(TestNumInputs);
3838

39-
BENCHMARK_CAPTURE(BM_ConstructFromRange, deque_size_t, std::deque<size_t>{}, getRandomIntegerInputs<size_t>)
39+
BENCHMARK_CAPTURE(BM_ConstructFromRange<std::deque<size_t>>, deque_size_t, getRandomIntegerInputs<size_t>)
4040
->Arg(TestNumInputs);
4141

42-
BENCHMARK_CAPTURE(BM_ConstructFromRange, deque_string, std::deque<std::string>{}, getRandomStringInputs)
42+
BENCHMARK_CAPTURE(BM_ConstructFromRange<std::deque<std::string>>, deque_string, getRandomStringInputs)
4343
->Arg(TestNumInputs);
4444

4545
BENCHMARK_CAPTURE(BM_erase_iter_in_middle, deque_int, std::deque<int>{}, getRandomIntegerInputs<int>)

0 commit comments

Comments
 (0)