Skip to content

Commit 948da90

Browse files
committed
Speed-up range operations in vector<bool>
1 parent 4ea44eb commit 948da90

File tree

8 files changed

+399
-46
lines changed

8 files changed

+399
-46
lines changed

libcxx/include/__algorithm/copy.h

+80
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,13 @@
1313
#include <__algorithm/for_each_segment.h>
1414
#include <__algorithm/min.h>
1515
#include <__config>
16+
#include <__fwd/bit_reference.h>
17+
#include <__iterator/distance.h>
1618
#include <__iterator/iterator_traits.h>
1719
#include <__iterator/segmented_iterator.h>
1820
#include <__type_traits/common_type.h>
1921
#include <__type_traits/enable_if.h>
22+
#include <__type_traits/is_convertible.h>
2023
#include <__utility/move.h>
2124
#include <__utility/pair.h>
2225

@@ -32,6 +35,25 @@ _LIBCPP_BEGIN_NAMESPACE_STD
3235
template <class _InIter, class _Sent, class _OutIter>
3336
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter);
3437

38+
template <class _InIter,
39+
class _Sent,
40+
__enable_if_t<__has_input_iterator_category<_InIter>::value &&
41+
!__has_random_access_iterator_category<_InIter>::value,
42+
int> = 0>
43+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InIter>::difference_type
44+
__iter_sent_distance(_InIter __first, _Sent __last) {
45+
typename iterator_traits<_InIter>::difference_type __r(0);
46+
for (; __first != __last; ++__first)
47+
++__r;
48+
return __r;
49+
}
50+
51+
template <class _InIter, class _Sent, __enable_if_t<__has_random_access_iterator_category<_InIter>::value, int> = 0>
52+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InIter>::difference_type
53+
__iter_sent_distance(_InIter __first, _Sent __last) {
54+
return static_cast<typename iterator_traits<_InIter>::difference_type>(__last - __first);
55+
}
56+
3557
struct __copy_impl {
3658
template <class _InIter, class _Sent, class _OutIter>
3759
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
@@ -95,6 +117,64 @@ struct __copy_impl {
95117
}
96118
}
97119

120+
template <class _InIter,
121+
class _Sent,
122+
class _Cp,
123+
__enable_if_t<(__has_forward_iterator_category<_InIter>::value ||
124+
__has_iterator_concept_convertible_to<_InIter, forward_iterator_tag>::value) &&
125+
is_convertible<typename iterator_traits<_InIter>::value_type, bool>::value,
126+
int> = 0>
127+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, __bit_iterator<_Cp, false> >
128+
operator()(_InIter __first, _Sent __last, __bit_iterator<_Cp, false> __result) const {
129+
using _It = __bit_iterator<_Cp, false>;
130+
using __storage_type = typename _It::__storage_type;
131+
#if _LIBCPP_STD_VER >= 20
132+
__storage_type __n = static_cast<__storage_type>(std::ranges::distance(__first, __last));
133+
#else
134+
__storage_type __n = static_cast<__storage_type>(std::__iter_sent_distance(__first, __last));
135+
#endif
136+
const unsigned __bits_per_word = _It::__bits_per_word;
137+
138+
if (__first != __last) {
139+
// do first partial word, if present
140+
if (__result.__ctz_ != 0) {
141+
__storage_type __clz = static_cast<__storage_type>(__bits_per_word - __result.__ctz_);
142+
__storage_type __dn = std::min(__clz, __n);
143+
__storage_type __w = *__result.__seg_;
144+
__storage_type __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
145+
__w &= ~__m;
146+
for (__storage_type __i = 0; __i < __dn; ++__i, ++__first)
147+
__w |= static_cast<__storage_type>(*__first) << __result.__ctz_++;
148+
*__result.__seg_ = __w;
149+
if (__result.__ctz_ == __bits_per_word) {
150+
__result.__ctz_ = 0;
151+
++__result.__seg_;
152+
}
153+
__n -= __dn;
154+
}
155+
}
156+
// do middle whole words, if present
157+
__storage_type __nw = __n / __bits_per_word;
158+
__n -= __nw * __bits_per_word;
159+
for (; __nw; --__nw) {
160+
__storage_type __w = 0;
161+
for (__storage_type __i = 0; __i < __bits_per_word; ++__i, ++__first)
162+
__w |= static_cast<__storage_type>(*__first) << __i;
163+
*__result.__seg_++ = __w;
164+
}
165+
// do last partial word, if present
166+
if (__n) {
167+
__storage_type __w = 0;
168+
for (__storage_type __i = 0; __i < __n; ++__i, ++__first)
169+
__w |= static_cast<__storage_type>(*__first) << __i;
170+
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
171+
*__result.__seg_ &= ~__m;
172+
*__result.__seg_ |= __w;
173+
__result.__ctz_ = __n;
174+
}
175+
return std::make_pair(std::move(__first), std::move(__result));
176+
}
177+
98178
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
99179
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
100180
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>

libcxx/include/__bit_reference

+14
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifndef _LIBCPP___BIT_REFERENCE
1111
#define _LIBCPP___BIT_REFERENCE
1212

13+
#include <__algorithm/copy.h>
1314
#include <__algorithm/copy_n.h>
1415
#include <__algorithm/min.h>
1516
#include <__bit/countr.h>
@@ -22,8 +23,11 @@
2223
#include <__memory/construct_at.h>
2324
#include <__memory/pointer_traits.h>
2425
#include <__type_traits/conditional.h>
26+
#include <__type_traits/enable_if.h>
2527
#include <__type_traits/is_constant_evaluated.h>
28+
#include <__type_traits/is_convertible.h>
2629
#include <__type_traits/void_t.h>
30+
#include <__utility/pair.h>
2731
#include <__utility/swap.h>
2832

2933
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -978,6 +982,16 @@ private:
978982
template <class _Dp>
979983
friend struct __bit_array;
980984

985+
template <class _InIter,
986+
class _Sent,
987+
class _Dp,
988+
__enable_if_t<(__has_forward_iterator_category<_InIter>::value ||
989+
__has_iterator_concept_convertible_to<_InIter, forward_iterator_tag>::value) &&
990+
is_convertible<typename iterator_traits<_InIter>::value_type, bool>::value,
991+
int> >
992+
_LIBCPP_CONSTEXPR_SINCE_CXX14 friend pair<_InIter, __bit_iterator<_Dp, false> >
993+
__copy_impl::operator()(_InIter __first, _Sent __last, __bit_iterator<_Dp, false> __result) const;
994+
981995
template <bool _FillVal, class _Dp>
982996
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend void
983997
__fill_n_bool(__bit_iterator<_Dp, false> __first, typename __size_difference_type_traits<_Dp>::size_type __n);

libcxx/include/__vector/vector_bool.h

-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,6 @@ class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator> {
171171
if constexpr (ranges::forward_range<_Range> || ranges::sized_range<_Range>) {
172172
auto __n = static_cast<size_type>(ranges::distance(__range));
173173
__init_with_size(ranges::begin(__range), ranges::end(__range), __n);
174-
175174
} else {
176175
__init_with_sentinel(ranges::begin(__range), ranges::end(__range));
177176
}

libcxx/test/benchmarks/containers/ContainerBenchmarks.h

+105-14
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010
#ifndef BENCHMARK_CONTAINER_BENCHMARKS_H
1111
#define BENCHMARK_CONTAINER_BENCHMARKS_H
1212

13+
#include <__type_traits/type_identity.h>
1314
#include <cassert>
1415
#include <iterator>
1516
#include <utility>
1617

1718
#include "benchmark/benchmark.h"
19+
#include "../../std/containers/from_range_helpers.h"
1820
#include "../Utilities.h"
1921
#include "test_iterators.h"
2022

@@ -51,16 +53,57 @@ void BM_Assignment(benchmark::State& st, Container) {
5153
}
5254
}
5355

56+
template <class Container, class Generator, class InputIter = std::__type_identity_t<typename Container::iterator>>
57+
void BM_AssignIterIter(benchmark::State& st, Generator gen, InputIter = {}) {
58+
using T = typename Container::value_type;
59+
auto size = st.range(0);
60+
auto in1 = gen(size);
61+
auto in2 = gen(size);
62+
DoNotOptimizeData(in1);
63+
DoNotOptimizeData(in2);
64+
Container c(in1.begin(), in1.end());
65+
DoNotOptimizeData(c);
66+
bool toggle = false;
67+
for (auto _ : st) {
68+
std::vector<T>& in = toggle ? in1 : in2;
69+
auto first = in.begin();
70+
auto last = in.end();
71+
c.assign(InputIter(first), InputIter(last));
72+
toggle = !toggle;
73+
DoNotOptimizeData(c);
74+
}
75+
}
76+
77+
template <typename Container, class Generator, class Range = std::__type_identity_t<Container>>
78+
void BM_AssignRange(benchmark::State& st, Generator gen, Range = {}) {
79+
auto size = st.range(0);
80+
auto in1 = gen(size);
81+
auto in2 = gen(size);
82+
DoNotOptimizeData(in1);
83+
DoNotOptimizeData(in2);
84+
Range rg1(std::ranges::begin(in1), std::ranges::end(in1));
85+
Range rg2(std::ranges::begin(in2), std::ranges::end(in2));
86+
Container c(std::from_range, rg1);
87+
DoNotOptimizeData(c);
88+
bool toggle = false;
89+
for (auto _ : st) {
90+
auto& rg = toggle ? rg1 : rg2;
91+
c.assign_range(rg);
92+
toggle = !toggle;
93+
DoNotOptimizeData(c);
94+
}
95+
}
96+
5497
template <std::size_t... sz, typename Container, typename GenInputs>
5598
void BM_AssignInputIterIter(benchmark::State& st, Container c, GenInputs gen) {
5699
auto v = gen(1, sz...);
57100
c.resize(st.range(0), v[0]);
58101
auto in = gen(st.range(1), sz...);
59-
benchmark::DoNotOptimize(&in);
60-
benchmark::DoNotOptimize(&c);
102+
DoNotOptimizeData(in);
103+
DoNotOptimizeData(c);
61104
for (auto _ : st) {
62105
c.assign(cpp17_input_iterator(in.begin()), cpp17_input_iterator(in.end()));
63-
benchmark::ClobberMemory();
106+
DoNotOptimizeData(c);
64107
}
65108
}
66109

@@ -73,24 +116,26 @@ void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container::
73116
}
74117
}
75118

76-
template <class Container, class GenInputs>
77-
void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {
78-
auto in = gen(st.range(0));
79-
const auto begin = in.begin();
80-
const auto end = in.end();
81-
benchmark::DoNotOptimize(&in);
119+
template <class Container, class GenInputs, class InputIter = std::__type_identity_t<typename Container::iterator>>
120+
void BM_ConstructIterIter(benchmark::State& st, GenInputs gen, InputIter = {}) {
121+
auto in = gen(st.range(0));
122+
DoNotOptimizeData(in);
123+
const auto begin = InputIter(in.begin());
124+
const auto end = InputIter(in.end());
82125
while (st.KeepRunning()) {
83-
Container c(begin, end);
126+
Container c(begin, end); // we assume the destructor doesn't dominate the benchmark
84127
DoNotOptimizeData(c);
85128
}
86129
}
87130

88-
template <class Container, class GenInputs>
89-
void BM_ConstructFromRange(benchmark::State& st, Container, GenInputs gen) {
131+
template <class Container, class GenInputs, class Range = std::__type_identity_t<Container>>
132+
void BM_ConstructFromRange(benchmark::State& st, GenInputs gen, Range = {}) {
90133
auto in = gen(st.range(0));
91-
benchmark::DoNotOptimize(&in);
134+
DoNotOptimizeData(in);
135+
Range rg(std::ranges::begin(in), std::ranges::end(in));
136+
// Range rg(in.begin(), in.end());
92137
while (st.KeepRunning()) {
93-
Container c(std::from_range, in);
138+
Container c(std::from_range, rg); // we assume the destructor doesn't dominate the benchmark
94139
DoNotOptimizeData(c);
95140
}
96141
}
@@ -108,6 +153,52 @@ void BM_Pushback_no_grow(benchmark::State& state, Container c) {
108153
}
109154
}
110155

156+
template <class Container, class GenInputs, class InputIter = std::__type_identity_t<typename Container::iterator>>
157+
void BM_InsertIterIterIter(benchmark::State& st, GenInputs gen, InputIter = {}) {
158+
auto in = gen(st.range(0));
159+
DoNotOptimizeData(in);
160+
const auto beg = InputIter(in.begin());
161+
const auto end = InputIter(in.end());
162+
const unsigned size = 100;
163+
Container c(size);
164+
DoNotOptimizeData(c);
165+
for (auto _ : st) {
166+
c.insert(c.begin(), beg, end);
167+
DoNotOptimizeData(c);
168+
c.erase(c.begin() + size, c.end()); // avoid growing indefinitely
169+
}
170+
}
171+
172+
template <class Container, class GenInputs, class Range = std::__type_identity_t<Container>>
173+
void BM_InsertRange(benchmark::State& st, GenInputs gen, Range = {}) {
174+
auto in = gen(st.range(0));
175+
DoNotOptimizeData(in);
176+
Range rg(std::ranges::begin(in), std::ranges::end(in));
177+
const unsigned size = 100;
178+
Container c(size);
179+
DoNotOptimizeData(c);
180+
for (auto _ : st) {
181+
c.insert_range(c.begin(), rg);
182+
DoNotOptimizeData(c);
183+
c.erase(c.begin() + size, c.end()); // avoid growing indefinitely
184+
}
185+
}
186+
187+
template <class Container, class GenInputs, class Range = std::__type_identity_t<Container>>
188+
void BM_AppendRange(benchmark::State& st, GenInputs gen, Range = {}) {
189+
auto in = gen(st.range(0));
190+
DoNotOptimizeData(in);
191+
Range rg(std::ranges::begin(in), std::ranges::end(in));
192+
const unsigned size = 100;
193+
Container c(size);
194+
DoNotOptimizeData(c);
195+
for (auto _ : st) {
196+
c.append_range(rg);
197+
DoNotOptimizeData(c);
198+
c.erase(c.begin() + size, c.end()); // avoid growing indefinitely
199+
}
200+
}
201+
111202
template <class Container, class GenInputs>
112203
void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
113204
auto in = gen(st.range(0));

0 commit comments

Comments
 (0)