From 9ee0dc14fe4b41a687e51fedb08d58193336a759 Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Mon, 16 Dec 2024 14:48:25 +0100 Subject: [PATCH 1/3] Fix formatting of license comment in test_parallel_utilities.cpp --- kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp b/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp index bbebb26b2891..c9cfe35f5a69 100644 --- a/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp +++ b/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp @@ -4,11 +4,12 @@ // _|\_\_| \__,_|\__|\___/ ____/ // Multi-Physics // -// License: BSD License -// Kratos default license: kratos/license.txt +// License: BSD License +// Kratos default license: kratos/license.txt // // Main authors: Riccardo Rossi // Philipp Bucher (https://github.com/philbucher) +// // System includes #include From 34cf34331a8d5175545ac3db272b60ea64b301db Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Mon, 16 Dec 2024 15:41:54 +0100 Subject: [PATCH 2/3] Add benchmarks for parallel utilities in parallel_utilities_benchmark.cpp --- .../parallel_utilities_benchmark.cpp | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 kratos/benchmarks/parallel_utilities_benchmark.cpp diff --git a/kratos/benchmarks/parallel_utilities_benchmark.cpp b/kratos/benchmarks/parallel_utilities_benchmark.cpp new file mode 100644 index 000000000000..c43c4b8d2ad5 --- /dev/null +++ b/kratos/benchmarks/parallel_utilities_benchmark.cpp @@ -0,0 +1,103 @@ +// | / | +// ' / __| _` | __| _ \ __| +// . \ | ( | | ( |\__ ` +// _|\_\_| \__,_|\__|\___/ ____/ +// Multi-Physics +// +// License: BSD License +// Kratos default license: kratos/license.txt +// +// Main authors: Vicente Mataix Ferrandiz +// + +// System includes +#include +#include +#include +#include + +// External includes +#include + +// Project includes +#include "utilities/parallel_utilities.h" +#include "utilities/reduction_utilities.h" + +namespace Kratos +{ +// Template class for testing +template +class RHSElement { +public: + explicit RHSElement(const double Val) : mRHSVal(Val) {} + void CalculateRHS(std::vector& rVector) { + if (rVector.size() != TSize) { rVector.resize(TSize); } + std::fill(rVector.begin(), rVector.end(), mRHSVal); + } + double GetAccumRHSValue() { return mAccumRHSValue; } + void SetAccumRHSValue(double Value) { mAccumRHSValue = Value; } + +private: + double mRHSVal; + double mAccumRHSValue = 0.0; +}; + +// Benchmark for power operation on a vector +static void BM_VectorPower(benchmark::State& state) { + int nsize = state.range(0); + std::vector data_vector(nsize, 5.0); + + for (auto _ : state) { + std::for_each(data_vector.begin(), data_vector.end(), [](double& item) { + item = std::pow(item, 0.1); + }); + } +} + +// Benchmark for reduction +static void BM_VectorReduction(benchmark::State& state) { + int nsize = state.range(0); + const std::vector data_vector(nsize, 5.0); + + for (auto _ : state) { + double final_sum = std::accumulate(data_vector.begin(), data_vector.end(), 0.0); + benchmark::DoNotOptimize(final_sum); + } +} + +// Benchmark for element-wise operations with thread-local storage +static void BM_ThreadLocalStorage(benchmark::State& state) { + constexpr std::size_t vec_size = 6; + std::size_t n_elems = state.range(0); + + using RHSElementType = RHSElement; + + std::vector rhs_vals(n_elems); + for (std::size_t i = 0; i < n_elems; ++i) { + rhs_vals[i] = (i % 12) * 1.889; + } + + std::vector elements; + for (std::size_t i = 0; i < rhs_vals.size(); ++i) { + elements.push_back(RHSElementType(rhs_vals[i])); + } + + std::vector tls(vec_size); + + for (auto _ : state) { + for (auto& elem : elements) { + elem.CalculateRHS(tls); + double sum = std::accumulate(tls.begin(), tls.end(), 0.0); + elem.SetAccumRHSValue(sum); + } + } +} + +// Register benchmarks and provide input size as a command-line option +BENCHMARK(BM_VectorPower)->Arg(1e3)->Arg(1e5)->Arg(1e6); +BENCHMARK(BM_VectorReduction)->Arg(1e3)->Arg(1e5)->Arg(1e6); +BENCHMARK(BM_ThreadLocalStorage)->Arg(1e3)->Arg(1e5)->Arg(1e6); + +} // namespace Kratos + +BENCHMARK_MAIN(); From d09c941a6709d4aaf3aa0c7bb0b37bb22f75fa22 Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Tue, 17 Dec 2024 21:01:09 +0100 Subject: [PATCH 3/3] Refactor benchmarks in parallel_utilities_benchmark.cpp to utilize block-based operations for improved performance and clarity --- .../parallel_utilities_benchmark.cpp | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/kratos/benchmarks/parallel_utilities_benchmark.cpp b/kratos/benchmarks/parallel_utilities_benchmark.cpp index c43c4b8d2ad5..963bacba6b21 100644 --- a/kratos/benchmarks/parallel_utilities_benchmark.cpp +++ b/kratos/benchmarks/parallel_utilities_benchmark.cpp @@ -48,7 +48,7 @@ static void BM_VectorPower(benchmark::State& state) { std::vector data_vector(nsize, 5.0); for (auto _ : state) { - std::for_each(data_vector.begin(), data_vector.end(), [](double& item) { + block_for_each(data_vector, [](double& item) { item = std::pow(item, 0.1); }); } @@ -57,11 +57,14 @@ static void BM_VectorPower(benchmark::State& state) { // Benchmark for reduction static void BM_VectorReduction(benchmark::State& state) { int nsize = state.range(0); - const std::vector data_vector(nsize, 5.0); + std::vector data_vector(nsize, 5.0); for (auto _ : state) { - double final_sum = std::accumulate(data_vector.begin(), data_vector.end(), 0.0); - benchmark::DoNotOptimize(final_sum); + auto final_sum = BlockPartition::iterator>(data_vector.begin(), + data_vector.end()).for_each>( + [](double& item){ + return item; + }); } } @@ -82,14 +85,20 @@ static void BM_ThreadLocalStorage(benchmark::State& state) { elements.push_back(RHSElementType(rhs_vals[i])); } - std::vector tls(vec_size); + auto tls_lambda_manual_reduction = [](RHSElementType& rElem, std::vector& rTLS) + { + rElem.CalculateRHS(rTLS); + double rhs_sum = std::accumulate(rTLS.begin(), rTLS.end(), 0.0); + rElem.SetAccumRHSValue(rhs_sum); + }; for (auto _ : state) { - for (auto& elem : elements) { - elem.CalculateRHS(tls); - double sum = std::accumulate(tls.begin(), tls.end(), 0.0); - elem.SetAccumRHSValue(sum); - } + BlockPartition::iterator>(elements.begin(), + elements.end()).for_each(std::vector(), tls_lambda_manual_reduction); + + const double sum_elem_rhs_vals = std::accumulate(elements.begin(), elements.end(), 0.0, [](double acc, RHSElementType& rElem){ + return acc + rElem.GetAccumRHSValue(); + }); } }