Skip to content

Commit 7bef800

Browse files
committed
Speed-up range operations in vector<bool>
1 parent 4e2efc3 commit 7bef800

File tree

5 files changed

+195
-0
lines changed

5 files changed

+195
-0
lines changed

libcxx/include/__algorithm/copy.h

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,13 @@
1313
#include <__algorithm/for_each_segment.h>
1414
#include <__algorithm/min.h>
1515
#include <__config>
16+
#include <__fwd/bit_reference.h>
17+
#include <__iterator/distance.h>
1618
#include <__iterator/iterator_traits.h>
1719
#include <__iterator/segmented_iterator.h>
1820
#include <__type_traits/common_type.h>
1921
#include <__type_traits/enable_if.h>
22+
#include <__type_traits/is_convertible.h>
2023
#include <__utility/move.h>
2124
#include <__utility/pair.h>
2225

@@ -95,6 +98,58 @@ struct __copy_impl {
9598
}
9699
}
97100

101+
template <class _InIter,
102+
class _Cp,
103+
__enable_if_t<__has_forward_iterator_category<_InIter>::value &&
104+
is_convertible<typename iterator_traits<_InIter>::value_type, bool>::value,
105+
int> = 0>
106+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, __bit_iterator<_Cp, false> >
107+
operator()(_InIter __first, _InIter __last, __bit_iterator<_Cp, false> __result) const {
108+
using _It = __bit_iterator<_Cp, false>;
109+
using __storage_type = typename _It::__storage_type;
110+
__storage_type __n = static_cast<__storage_type>(std::distance(__first, __last));
111+
const unsigned __bits_per_word = _It::__bits_per_word;
112+
113+
if (__n) {
114+
// do first partial word, if present
115+
if (__result.__ctz_ != 0) {
116+
__storage_type __clz = static_cast<__storage_type>(__bits_per_word - __result.__ctz_);
117+
__storage_type __dn = std::min(__clz, __n);
118+
__storage_type __w = *__result.__seg_;
119+
__storage_type __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
120+
__w &= ~__m;
121+
for (__storage_type __i = 0; __i < __dn; ++__i, ++__first)
122+
__w |= static_cast<__storage_type>(*__first) << __result.__ctz_++;
123+
*__result.__seg_ = __w;
124+
if (__result.__ctz_ == __bits_per_word) {
125+
__result.__ctz_ = 0;
126+
++__result.__seg_;
127+
}
128+
__n -= __dn;
129+
}
130+
}
131+
// do middle whole words, if present
132+
__storage_type __nw = __n / __bits_per_word;
133+
__n -= __nw * __bits_per_word;
134+
for (; __nw; --__nw) {
135+
__storage_type __w = 0;
136+
for (__storage_type __i = 0; __i < __bits_per_word; ++__i, ++__first)
137+
__w |= static_cast<__storage_type>(*__first) << __i;
138+
*__result.__seg_++ = __w;
139+
}
140+
// do last partial word, if present
141+
if (__n) {
142+
__storage_type __w = 0;
143+
for (__storage_type __i = 0; __i < __n; ++__i, ++__first)
144+
__w |= static_cast<__storage_type>(*__first) << __i;
145+
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
146+
*__result.__seg_ &= ~__m;
147+
*__result.__seg_ |= __w;
148+
__result.__ctz_ = __n;
149+
}
150+
return std::make_pair(std::move(__first), std::move(__result));
151+
}
152+
98153
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
99154
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
100155
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>

libcxx/include/__bit_reference

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifndef _LIBCPP___BIT_REFERENCE
1111
#define _LIBCPP___BIT_REFERENCE
1212

13+
#include <__algorithm/copy.h>
1314
#include <__algorithm/copy_n.h>
1415
#include <__algorithm/min.h>
1516
#include <__bit/countr.h>
@@ -22,8 +23,11 @@
2223
#include <__memory/construct_at.h>
2324
#include <__memory/pointer_traits.h>
2425
#include <__type_traits/conditional.h>
26+
#include <__type_traits/enable_if.h>
2527
#include <__type_traits/is_constant_evaluated.h>
28+
#include <__type_traits/is_convertible.h>
2629
#include <__type_traits/void_t.h>
30+
#include <__utility/pair.h>
2731
#include <__utility/swap.h>
2832

2933
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -978,6 +982,14 @@ private:
978982
template <class _Dp>
979983
friend struct __bit_array;
980984

985+
template <class _InIter,
986+
class _Dp,
987+
__enable_if_t<__has_forward_iterator_category<_InIter>::value &&
988+
is_convertible<typename iterator_traits<_InIter>::value_type, bool>::value,
989+
int> >
990+
_LIBCPP_CONSTEXPR_SINCE_CXX14 friend pair<_InIter, __bit_iterator<_Dp, false> >
991+
__copy_impl::operator()(_InIter __first, _InIter __last, __bit_iterator<_Dp, false> __result) const;
992+
981993
template <bool _FillVal, class _Dp>
982994
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend void
983995
__fill_n_bool(__bit_iterator<_Dp, false> __first, typename __size_difference_type_traits<_Dp>::size_type __n);

libcxx/test/benchmarks/containers/ContainerBenchmarks.h

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,30 @@ void BM_Assignment(benchmark::State& st, Container) {
5151
}
5252
}
5353

54+
template <class Container, class GenInputs>
55+
void BM_assign_iter_iter(benchmark::State& st, Container c, GenInputs gen) {
56+
auto in = gen(st.range(0));
57+
auto beg = in.begin();
58+
auto end = in.end();
59+
for (auto _ : st) {
60+
c.assign(beg, end);
61+
DoNotOptimizeData(c);
62+
DoNotOptimizeData(in);
63+
benchmark::ClobberMemory();
64+
}
65+
}
66+
67+
template <std::size_t... sz, typename Container, typename GenInputs>
68+
void BM_assign_range(benchmark::State& st, Container c, GenInputs gen) {
69+
auto in = gen(st.range(0));
70+
for (auto _ : st) {
71+
c.assign_range(in);
72+
DoNotOptimizeData(c);
73+
DoNotOptimizeData(in);
74+
benchmark::ClobberMemory();
75+
}
76+
}
77+
5478
template <std::size_t... sz, typename Container, typename GenInputs>
5579
void BM_AssignInputIterIter(benchmark::State& st, Container c, GenInputs gen) {
5680
auto v = gen(1, sz...);
@@ -108,6 +132,40 @@ void BM_Pushback_no_grow(benchmark::State& state, Container c) {
108132
}
109133
}
110134

135+
template <class Container, class GenInputs>
136+
void BM_insert_iter_iter_iter(benchmark::State& st, Container c, GenInputs gen) {
137+
auto in = gen(st.range(0));
138+
const auto beg = in.begin();
139+
const auto end = in.end();
140+
for (auto _ : st) {
141+
c.resize(100);
142+
c.insert(c.begin() + 50, beg, end);
143+
DoNotOptimizeData(c);
144+
benchmark::ClobberMemory();
145+
}
146+
}
147+
148+
template <class Container, class GenInputs>
149+
void BM_insert_range(benchmark::State& st, Container c, GenInputs gen) {
150+
auto in = gen(st.range(0));
151+
for (auto _ : st) {
152+
c.resize(100);
153+
c.insert_range(c.begin() + 50, in);
154+
DoNotOptimizeData(c);
155+
benchmark::ClobberMemory();
156+
}
157+
}
158+
159+
template <class Container, class GenInputs>
160+
void BM_append_range(benchmark::State& st, Container c, GenInputs gen) {
161+
auto in = gen(st.range(0));
162+
for (auto _ : st) {
163+
c.append_range(in);
164+
DoNotOptimizeData(c);
165+
benchmark::ClobberMemory();
166+
}
167+
}
168+
111169
template <class Container, class GenInputs>
112170
void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
113171
auto in = gen(st.range(0));
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
10+
11+
#include <cstdint>
12+
#include <cstdlib>
13+
#include <cstring>
14+
#include <deque>
15+
#include <functional>
16+
#include <memory>
17+
#include <string>
18+
#include <vector>
19+
20+
#include "benchmark/benchmark.h"
21+
#include "ContainerBenchmarks.h"
22+
#include "../GenerateInput.h"
23+
24+
using namespace ContainerBenchmarks;
25+
26+
BENCHMARK_CAPTURE(BM_ConstructIterIter, vector_bool, std::vector<bool>{}, getRandomIntegerInputs<bool>)->Arg(5140480);
27+
BENCHMARK_CAPTURE(BM_ConstructFromRange, vector_bool, std::vector<bool>{}, getRandomIntegerInputs<bool>)->Arg(5140480);
28+
29+
BENCHMARK_CAPTURE(BM_assign_iter_iter, vector_bool, std::vector<bool>{}, getRandomIntegerInputs<bool>)->Arg(5140480);
30+
BENCHMARK_CAPTURE(BM_assign_range, vector_bool, std::vector<bool>{}, getRandomIntegerInputs<bool>)->Arg(5140480);
31+
32+
BENCHMARK_CAPTURE(BM_insert_iter_iter_iter, vector_bool, std::vector<bool>{}, getRandomIntegerInputs<bool>)
33+
->Arg(5140480);
34+
BENCHMARK_CAPTURE(BM_insert_range, vector_bool, std::vector<bool>{}, getRandomIntegerInputs<bool>)->Arg(5140480);
35+
BENCHMARK_CAPTURE(BM_append_range, vector_bool, std::vector<bool>{}, getRandomIntegerInputs<bool>)->Arg(5140480);
36+
37+
BENCHMARK_MAIN();

libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
// copy(InIter first, InIter last, OutIter result);
1414

1515
#include <algorithm>
16+
#include <array>
1617
#include <cassert>
18+
#include <vector>
1719

1820
#include "test_macros.h"
1921
#include "test_iterators.h"
@@ -59,6 +61,29 @@ struct TestInIters {
5961
}
6062
};
6163

64+
template <std::size_t N>
65+
struct TestFwdIterInBitIterOut {
66+
std::array<bool, N> in = {};
67+
template <class FwdIter>
68+
TEST_CONSTEXPR_CXX20 void operator()() {
69+
for (std::size_t i = 0; i < in.size(); i += 2)
70+
in[i] = true;
71+
72+
{ // Test with full bytes
73+
std::vector<bool> out(N);
74+
std::copy(FwdIter(in.data()), FwdIter(in.data() + N), out.begin());
75+
for (std::size_t i = 0; i < N; ++i)
76+
assert(out[i] == static_cast<bool>(in[i]));
77+
}
78+
{ // Test with partial bytes in both front and back
79+
std::vector<bool> out(N + 8);
80+
std::copy(FwdIter(in.data()), FwdIter(in.data() + N), out.begin() + 4);
81+
for (std::size_t i = 0; i < N; ++i)
82+
assert(out[i + 4] == static_cast<bool>(in[i]));
83+
}
84+
}
85+
};
86+
6287
TEST_CONSTEXPR_CXX20 bool test() {
6388
types::for_each(types::cpp17_input_iterator_list<int*>(), TestInIters());
6489

@@ -78,6 +103,14 @@ TEST_CONSTEXPR_CXX20 bool test() {
78103
assert(std::equal(a, a + 10, expected));
79104
}
80105

106+
{ // Test std::copy() with forward_iterator-pair input and vector<bool>::iterator output
107+
types::for_each(types::forward_iterator_list<bool*>(), TestFwdIterInBitIterOut<8>());
108+
types::for_each(types::forward_iterator_list<bool*>(), TestFwdIterInBitIterOut<16>());
109+
types::for_each(types::forward_iterator_list<bool*>(), TestFwdIterInBitIterOut<32>());
110+
types::for_each(types::forward_iterator_list<bool*>(), TestFwdIterInBitIterOut<64>());
111+
types::for_each(types::forward_iterator_list<bool*>(), TestFwdIterInBitIterOut<256>());
112+
}
113+
81114
return true;
82115
}
83116

0 commit comments

Comments
 (0)