-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PR: [libc++] Speed-up input_range based operations in vector<bool> #124188
base: main
Are you sure you want to change the base?
Conversation
522b0a9
to
8a3f8ee
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
8a3f8ee
to
18aab0b
Compare
@llvm/pr-subscribers-libcxx Author: Peng Liu (winner245) ChangesAs a follow-up to #120134 (which improved the performance of
Before:
After:
Full diff: https://github.com/llvm/llvm-project/pull/124188.diff 5 Files Affected:
diff --git a/libcxx/include/__vector/vector_bool.h b/libcxx/include/__vector/vector_bool.h
index 4f1c442ce0be8d..c04498626abe6c 100644
--- a/libcxx/include/__vector/vector_bool.h
+++ b/libcxx/include/__vector/vector_bool.h
@@ -411,8 +411,7 @@ class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator> {
__init_with_sentinel(_InputIterator __first, _Sentinel __last) {
auto __guard = std::__make_exception_guard(__destroy_vector(*this));
- for (; __first != __last; ++__first)
- push_back(*__first);
+ __push_back_words_with_sentinel(std::move(__first), std::move(__last));
__guard.__complete();
}
@@ -509,6 +508,10 @@ class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator> {
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __move_assign_alloc(vector&, false_type) _NOEXCEPT {}
+ template <class _InputIterator, class _Sentinel>
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+ __push_back_words_with_sentinel(_InputIterator __first, _Sentinel __last);
+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_t __hash_code() const _NOEXCEPT;
friend class __bit_reference<vector>;
@@ -820,8 +823,7 @@ template <class _Iterator, class _Sentinel>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
vector<bool, _Allocator>::__assign_with_sentinel(_Iterator __first, _Sentinel __last) {
clear();
- for (; __first != __last; ++__first)
- push_back(*__first);
+ __push_back_words_with_sentinel(std::move(__first), std::move(__last));
}
template <class _Allocator>
@@ -1084,6 +1086,35 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<bool, _Allocator>::flip() _NOEXCEPT {
*__p = ~*__p;
}
+// Push bits from the range [__first, __last) into the vector in word-sized chunks.
+// Precondition: The size of the vector must be a multiple of `__bits_per_word`,
+// implying that the vector can only accommodate full words of bits.
+//
+// This function iterates through the input range, collecting bits until a full
+// word is formed or the end of the range is reached. It then stores the word
+// in the vector's internal storage, reallocating if necessary.
+template <class _Allocator>
+template <class _InputIterator, class _Sentinel>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+vector<bool, _Allocator>::__push_back_words_with_sentinel(_InputIterator __first, _Sentinel __last) {
+ _LIBCPP_ASSERT_VALID_INPUT_RANGE(
+ this->__size_ % __bits_per_word == 0,
+ "vector<bool>::__push_back_words_with_sentinel called with a size that is not a multiple of __bits_per_word");
+ unsigned __n_words = this->__size_ / __bits_per_word;
+ while (__first != __last) {
+ __storage_type __w = 0;
+ unsigned __ctz = 0;
+ for (; __ctz != __bits_per_word && __first != __last; ++__ctz, (void)++__first) {
+ if (*__first)
+ __w |= static_cast<__storage_type>(static_cast<__storage_type>(1) << __ctz);
+ }
+ if (this->__size_ == this->capacity())
+ reserve(__recommend(this->__size_ + 1));
+ this->__begin_[__n_words++] = __w;
+ this->__size_ += __ctz;
+ }
+}
+
template <class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 bool vector<bool, _Allocator>::__invariants() const {
if (this->__begin_ == nullptr) {
diff --git a/libcxx/test/benchmarks/containers/ContainerBenchmarks.h b/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
index 5fc8981619672c..7a3aade34801b4 100644
--- a/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
+++ b/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
@@ -15,6 +15,7 @@
#include <utility>
#include "benchmark/benchmark.h"
+#include "../../std/containers/from_range_helpers.h"
#include "../Utilities.h"
#include "test_iterators.h"
@@ -51,16 +52,42 @@ void BM_Assignment(benchmark::State& st, Container) {
}
}
-template <std::size_t... sz, typename Container, typename GenInputs>
-void BM_AssignInputIterIter(benchmark::State& st, Container c, GenInputs gen) {
- auto v = gen(1, sz...);
- c.resize(st.range(0), v[0]);
- auto in = gen(st.range(1), sz...);
- benchmark::DoNotOptimize(&in);
- benchmark::DoNotOptimize(&c);
+template <typename Container, class Generator>
+void BM_AssignInputIterIter(benchmark::State& st, Generator gen) {
+ using T = typename Container::value_type;
+ auto size = st.range(0);
+ auto in1 = gen(size);
+ auto in2 = gen(size);
+ DoNotOptimizeData(in1);
+ DoNotOptimizeData(in2);
+ Container c(in1.begin(), in1.end());
+ bool toggle = false;
for (auto _ : st) {
- c.assign(cpp17_input_iterator(in.begin()), cpp17_input_iterator(in.end()));
- benchmark::ClobberMemory();
+ std::vector<T>& in = toggle ? in1 : in2;
+ auto first = in.begin();
+ auto last = in.end();
+ c.assign(cpp17_input_iterator(first), cpp17_input_iterator(last));
+ toggle = !toggle;
+ DoNotOptimizeData(c);
+ }
+}
+
+template <typename Container, class Generator>
+void BM_AssignInputRange(benchmark::State& st, Generator gen) {
+ auto size = st.range(0);
+ auto in1 = gen(size);
+ auto in2 = gen(size);
+ DoNotOptimizeData(in1);
+ DoNotOptimizeData(in2);
+ input_only_range rg1(std::ranges::begin(in1), std::ranges::end(in1));
+ input_only_range rg2(std::ranges::begin(in2), std::ranges::end(in2));
+ Container c(std::from_range, rg1);
+ bool toggle = false;
+ for (auto _ : st) {
+ auto& rg = toggle ? rg1 : rg2;
+ c.assign_range(rg);
+ toggle = !toggle;
+ DoNotOptimizeData(c);
}
}
@@ -85,6 +112,18 @@ void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {
}
}
+template <class Container, class GenInputs>
+void BM_ConstructInputIterIter(benchmark::State& st, GenInputs gen) {
+ auto in = gen(st.range(0));
+ const auto beg = cpp17_input_iterator(in.begin());
+ const auto end = cpp17_input_iterator(in.end());
+ benchmark::DoNotOptimize(&in);
+ while (st.KeepRunning()) {
+ Container c(beg, end);
+ DoNotOptimizeData(c);
+ }
+}
+
template <class Container, class GenInputs>
void BM_ConstructFromRange(benchmark::State& st, Container, GenInputs gen) {
auto in = gen(st.range(0));
@@ -95,6 +134,17 @@ void BM_ConstructFromRange(benchmark::State& st, Container, GenInputs gen) {
}
}
+template <class Container, class GenInputs>
+void BM_ConstructFromInputRange(benchmark::State& st, GenInputs gen) {
+ auto in = gen(st.range(0));
+ input_only_range rg(std::ranges::begin(in), std::ranges::end(in));
+ benchmark::DoNotOptimize(&in);
+ while (st.KeepRunning()) {
+ Container c(std::from_range, rg);
+ DoNotOptimizeData(c);
+ }
+}
+
template <class Container>
void BM_Pushback_no_grow(benchmark::State& state, Container c) {
int count = state.range(0);
diff --git a/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp b/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp
new file mode 100644
index 00000000000000..b619fbea7b2b93
--- /dev/null
+++ b/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <deque>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "ContainerBenchmarks.h"
+#include "../GenerateInput.h"
+
+using namespace ContainerBenchmarks;
+
+BENCHMARK_CAPTURE(BM_ConstructInputIterIter<std::vector<bool>>, vb, getRandomIntegerInputs<bool>)->Arg(514048);
+BENCHMARK_CAPTURE(BM_ConstructFromInputRange<std::vector<bool>>, vb, getRandomIntegerInputs<bool>)->Arg(514048);
+
+BENCHMARK_CAPTURE(BM_AssignInputIterIter<std::vector<bool>>, vb, getRandomIntegerInputs<bool>)->Arg(514048);
+BENCHMARK_CAPTURE(BM_AssignInputRange<std::vector<bool>>, vb, getRandomIntegerInputs<bool>)->Arg(514048);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/benchmarks/containers/vector_operations.bench.cpp b/libcxx/test/benchmarks/containers/vector_operations.bench.cpp
index 1cd754ca7e7803..3261cf3afab8bb 100644
--- a/libcxx/test/benchmarks/containers/vector_operations.bench.cpp
+++ b/libcxx/test/benchmarks/containers/vector_operations.bench.cpp
@@ -78,18 +78,11 @@ BENCHMARK(bm_grow<std::string>);
BENCHMARK(bm_grow<std::unique_ptr<int>>);
BENCHMARK(bm_grow<std::deque<int>>);
-BENCHMARK_CAPTURE(BM_AssignInputIterIter, vector_int, std::vector<int>{}, getRandomIntegerInputs<int>)
- ->Args({TestNumInputs, TestNumInputs});
+BENCHMARK_CAPTURE(BM_AssignInputIterIter<std::vector<int>>, vector_int, getRandomIntegerInputs<int>)
+ ->Arg(TestNumInputs);
-BENCHMARK_CAPTURE(
- BM_AssignInputIterIter<32>, vector_string, std::vector<std::string>{}, getRandomStringInputsWithLength)
- ->Args({TestNumInputs, TestNumInputs});
-
-BENCHMARK_CAPTURE(BM_AssignInputIterIter<100>,
- vector_vector_int,
- std::vector<std::vector<int>>{},
- getRandomIntegerInputsWithLength<int>)
- ->Args({TestNumInputs, TestNumInputs});
+BENCHMARK_CAPTURE(BM_AssignInputIterIter<std::vector<std::string>>, vector_string, getRandomStringInputs)
+ ->Arg(TestNumInputs);
BENCHMARK_CAPTURE(BM_Insert_InputIterIter_NoRealloc, vector_int, std::vector<int>(100, 1), getRandomIntegerInputs<int>)
->Arg(514048);
diff --git a/libcxx/test/std/containers/from_range_helpers.h b/libcxx/test/std/containers/from_range_helpers.h
index e17ea247618bc2..f98fddcf29d525 100644
--- a/libcxx/test/std/containers/from_range_helpers.h
+++ b/libcxx/test/std/containers/from_range_helpers.h
@@ -50,6 +50,24 @@ constexpr auto wrap_input(std::vector<T>& input) {
return std::ranges::subrange(std::move(b), std::move(e));
}
+template <class It>
+class input_only_range {
+public:
+ using Iter = cpp20_input_iterator<It>;
+ using Sent = sentinel_wrapper<Iter>;
+
+ input_only_range(It begin, It end) : begin_(std::move(begin)), end_(std::move(end)) {}
+ Iter begin() { return Iter(std::move(begin_)); }
+ Sent end() { return Sent(Iter(std::move(end_))); }
+
+private:
+ It begin_;
+ It end_;
+};
+
+template <class It>
+input_only_range(It, It) -> input_only_range<It>;
+
struct KeyValue {
int key; // Only the key is considered for equality comparison.
char value; // Allows distinguishing equivalent instances.
|
This PR complements the optimizations for forward (or higher) range in #120134 by extending the improvements to input-only and unsized range for range-based operations in
vector<bool>
. The improvement is based on the same optimization techniques as in #120134: Instead of processing individual bits as in the previous implementation, the new implementation first collects bits into full storage words and then processes entire words.The optimizations result in performance improvements of up to 5.5x for assignment functions with
input_{range, iterator}
inputs and up to 1.9x for constructors withinput_{range, iterator}
inputs.assign_range(R&& rg)
assign(InputIt first, InputIt last)
vector(std::from_range_t, R&& rg, const Allocator& alloc)
vector(InputIt first, InputIt last, const Allocator& alloc)
Before:
After:
Note: The constructors exhibit less performance improvement compared to the assignment functions due to slow reallocation operations with unsized
input_range
s orinput_iterator-pair
inputs, which slightly offset the optimization benefits.