From 897cd72d03a2c78f78ed503fe26a681664f175a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicente=20Mataix=20Ferr=C3=A1ndiz?= Date: Wed, 11 Dec 2024 10:11:27 +0100 Subject: [PATCH 01/15] [Core] Making explicitily `schedule(dynamic)` by default in OMP loops in `ParallelUtils` --- kratos/utilities/parallel_utilities.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kratos/utilities/parallel_utilities.h b/kratos/utilities/parallel_utilities.h index 0371bdb1e83c..171dcec9d87f 100644 --- a/kratos/utilities/parallel_utilities.h +++ b/kratos/utilities/parallel_utilities.h @@ -183,7 +183,7 @@ class BlockPartition { KRATOS_PREPARE_CATCH_THREAD_EXCEPTION - #pragma omp parallel for + #pragma omp parallel for schedule(dynamic) for (int i=0; i Date: Wed, 11 Dec 2024 12:14:22 +0100 Subject: [PATCH 02/15] Define `PYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF` to run OMP loops with dynamic schedule without conflicting the GIL --- kratos/includes/define_python.h | 1 + 1 file changed, 1 insertion(+) diff --git a/kratos/includes/define_python.h b/kratos/includes/define_python.h index 91e3a220496d..00de0f7ef8f4 100644 --- a/kratos/includes/define_python.h +++ b/kratos/includes/define_python.h @@ -16,6 +16,7 @@ // External includes #include +#define PYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF // NOTE: This is the only way to run OMP loops with dynamic schedule without conflicting the GIL // Project includes #include "intrusive_ptr/intrusive_ptr.hpp" From 91374a066b074cf79c333d8faa71960cc579481a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicente=20Mataix=20Ferr=C3=A1ndiz?= Date: Wed, 11 Dec 2024 13:54:10 +0100 Subject: [PATCH 03/15] Moving to Cmake --- CMakeLists.txt | 3 +++ kratos/includes/define_python.h | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f202808aeb6..5b091837c913 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -294,6 +294,9 @@ include(KratosGTest) include(KratosGBenchmark) include(FetchContent) +# This is the only way to run OMP loops with dynamic schedule without conflicting the GIL +add_definitions(-DPYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF) + # Logger configuration if(KRATOS_COLORED_OUTPUT MATCHES ON) add_definitions(-DKRATOS_COLORED_OUTPUT) diff --git a/kratos/includes/define_python.h b/kratos/includes/define_python.h index 00de0f7ef8f4..91e3a220496d 100644 --- a/kratos/includes/define_python.h +++ b/kratos/includes/define_python.h @@ -16,7 +16,6 @@ // External includes #include -#define PYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF // NOTE: This is the only way to run OMP loops with dynamic schedule without conflicting the GIL // Project includes #include "intrusive_ptr/intrusive_ptr.hpp" From 83ce3e7f167c264f4344fd8c030c82885252adde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicente=20Mataix=20Ferr=C3=A1ndiz?= Date: Thu, 12 Dec 2024 13:04:51 +0100 Subject: [PATCH 04/15] Refcator for runtime --- CMakeLists.txt | 21 +++++++++-- kratos/sources/kernel.cpp | 53 ++++++++++++++++++--------- kratos/utilities/parallel_utilities.h | 8 ++-- 3 files changed, 58 insertions(+), 24 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b091837c913..7ac39d71c009 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -294,9 +294,6 @@ include(KratosGTest) include(KratosGBenchmark) include(FetchContent) -# This is the only way to run OMP loops with dynamic schedule without conflicting the GIL -add_definitions(-DPYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF) - # Logger configuration if(KRATOS_COLORED_OUTPUT MATCHES ON) add_definitions(-DKRATOS_COLORED_OUTPUT) @@ -427,6 +424,24 @@ if (KRATOS_SHARED_MEMORY_PARALLELIZATION STREQUAL "OpenMP") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + + # Check if the environment variable OMP_SCHEDULE is defined + if(DEFINED ENV{OMP_SCHEDULE}) + # Set the already defined one + set(KRATOS_OMP_SCHEDULE $ENV{OMP_SCHEDULE}) + else(DEFINED ENV{OMP_SCHEDULE}) + # Add an option for the default KRATOS_OMP_SCHEDULE value + option(KRATOS_OMP_SCHEDULE "Set the default value for KRATOS_OMP_SCHEDULE (e.g., dynamic,4)" "dynamic") + endif((DEFINED ENV{OMP_SCHEDULE})) + + # Display the selected schedule in the build output + message(STATUS "KRATOS_OMP_SCHEDULE is set to: ${KRATOS_OMP_SCHEDULE}") + + # Define the OMP_SCHEDULE as a preprocessor macro + add_definitions(-DKRATOS_OMP_SCHEDULE="${KRATOS_OMP_SCHEDULE}") + + # This is the only way to run OMP loops with dynamic schedule without conflicting the GIL + add_definitions(-DPYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF else (OPENMP_FOUND) message(FATAL_ERROR "OpenMP could not be found!") # fallback solution => in future once better supported we can use the C++11 based parallelization instead diff --git a/kratos/sources/kernel.cpp b/kratos/sources/kernel.cpp index 6bf8a220ff76..d0f74ade2fc7 100644 --- a/kratos/sources/kernel.cpp +++ b/kratos/sources/kernel.cpp @@ -4,8 +4,8 @@ // _|\_\_| \__,_|\__|\___/ ____/ // Multi-Physics // -// License: BSD License -// Kratos default license: kratos/license.txt +// License: BSD License +// Kratos default license: kratos/license.txt // // Main authors: Pooyan Dadvand // @@ -141,33 +141,53 @@ void Kernel::SetPythonVersion(std::string pyVersion) { void Kernel::PrintParallelismSupportInfo() const { - #ifdef KRATOS_SMP_NONE +#ifdef KRATOS_SMP_NONE constexpr bool threading_support = false; - #else + constexpr auto smp = "None"; +#else constexpr bool threading_support = true; + std::string scheduling_str; + #ifdef KRATOS_SMP_OPENMP + // Check if the environment variable is defined + const char* var_name = "OMP_SCHEDULE"; + const char* scheduling = getenv(var_name); + + if (scheduling != nullptr) { // Correct variable name and nullptr comparison + scheduling_str = scheduling; + } else { + #ifdef KRATOS_OMP_SCHEDULE + scheduling_str = KRATOS_OMP_SCHEDULE; // Use the preprocessor-defined value + #else + scheduling_str = "dynamic"; // NOTE: This should not happen as defined in compiling time #endif + } - #ifdef KRATOS_USING_MPI - constexpr bool mpi_support = true; + const auto smp = "OpenMP, scheduling " + scheduling_str; // Use `std::string` for concatenation + #elif defined(KRATOS_SMP_CXX11) + constexpr auto smp = "C++11"; #else - constexpr bool mpi_support = false; + constexpr auto smp = "Unknown"; #endif +#endif + +#ifdef KRATOS_USING_MPI + constexpr bool mpi_support = true; +#else + constexpr bool mpi_support = false; +#endif Logger logger(""); logger << LoggerMessage::Severity::INFO; if (threading_support) { if (mpi_support) { - logger << "Compiled with threading and MPI support." << std::endl; - } - else { - logger << "Compiled with threading support." << std::endl; + logger << "Compiled with threading and MPI support. Threading support with " << smp << "." << std::endl; + } else { + logger << "Compiled with threading support. Threading support with " << smp << "." << std::endl; } - } - else if (mpi_support) { + } else if (mpi_support) { logger << "Compiled with MPI support." << std::endl; - } - else { + } else { logger << "Serial compilation." << std::endl; } @@ -179,8 +199,7 @@ void Kernel::PrintParallelismSupportInfo() const if (mIsDistributedRun) { const DataCommunicator& r_world = ParallelEnvironment::GetDataCommunicator("World"); logger << "MPI world size: " << r_world.Size() << "." << std::endl; - } - else { + } else { logger << "Running without MPI." << std::endl; } } diff --git a/kratos/utilities/parallel_utilities.h b/kratos/utilities/parallel_utilities.h index 171dcec9d87f..afbf39d45031 100644 --- a/kratos/utilities/parallel_utilities.h +++ b/kratos/utilities/parallel_utilities.h @@ -183,7 +183,7 @@ class BlockPartition { KRATOS_PREPARE_CATCH_THREAD_EXCEPTION - #pragma omp parallel for schedule(dynamic) + #pragma omp parallel for schedule(runtime) for (int i=0; i Date: Thu, 12 Dec 2024 14:06:00 +0100 Subject: [PATCH 05/15] Missing ) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ac39d71c009..53030038d882 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -441,7 +441,7 @@ if (KRATOS_SHARED_MEMORY_PARALLELIZATION STREQUAL "OpenMP") add_definitions(-DKRATOS_OMP_SCHEDULE="${KRATOS_OMP_SCHEDULE}") # This is the only way to run OMP loops with dynamic schedule without conflicting the GIL - add_definitions(-DPYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF + add_definitions(-DPYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF) else (OPENMP_FOUND) message(FATAL_ERROR "OpenMP could not be found!") # fallback solution => in future once better supported we can use the C++11 based parallelization instead From 7ece75ab5c94af7145c5024c32746e728a4c4ce8 Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Thu, 12 Dec 2024 18:06:13 +0100 Subject: [PATCH 06/15] Fix preprocessor directive formatting in Kernel::PrintParallelismSupportInfo --- kratos/sources/kernel.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kratos/sources/kernel.cpp b/kratos/sources/kernel.cpp index d0f74ade2fc7..14a61f395807 100644 --- a/kratos/sources/kernel.cpp +++ b/kratos/sources/kernel.cpp @@ -147,7 +147,7 @@ void Kernel::PrintParallelismSupportInfo() const #else constexpr bool threading_support = true; std::string scheduling_str; - #ifdef KRATOS_SMP_OPENMP + #if defined(KRATOS_SMP_OPENMP) // Check if the environment variable is defined const char* var_name = "OMP_SCHEDULE"; const char* scheduling = getenv(var_name); @@ -155,11 +155,11 @@ void Kernel::PrintParallelismSupportInfo() const if (scheduling != nullptr) { // Correct variable name and nullptr comparison scheduling_str = scheduling; } else { - #ifdef KRATOS_OMP_SCHEDULE + #ifdef KRATOS_OMP_SCHEDULE scheduling_str = KRATOS_OMP_SCHEDULE; // Use the preprocessor-defined value - #else + #else scheduling_str = "dynamic"; // NOTE: This should not happen as defined in compiling time - #endif + #endif } const auto smp = "OpenMP, scheduling " + scheduling_str; // Use `std::string` for concatenation From 8ba1ab56a6a398d0669ca22a043c975ffd4ae808 Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Thu, 12 Dec 2024 18:06:21 +0100 Subject: [PATCH 07/15] Set default value for KRATOS_OMP_SCHEDULE if not defined in CMake --- CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 53030038d882..d0ef2b9a5591 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -430,8 +430,11 @@ if (KRATOS_SHARED_MEMORY_PARALLELIZATION STREQUAL "OpenMP") # Set the already defined one set(KRATOS_OMP_SCHEDULE $ENV{OMP_SCHEDULE}) else(DEFINED ENV{OMP_SCHEDULE}) - # Add an option for the default KRATOS_OMP_SCHEDULE value - option(KRATOS_OMP_SCHEDULE "Set the default value for KRATOS_OMP_SCHEDULE (e.g., dynamic,4)" "dynamic") + # If not defined set the default value + if(NOT DEFINED KRATOS_OMP_SCHEDULE) + message(STATUS "OMP_SCHEDULE is not defined, setting to dynamic. You can also set it with the environment variable OMP_SCHEDULE or with the CMake variable KRATOS_OMP_SCHEDULE (e.g., dynamic,4)") + set(KRATOS_OMP_SCHEDULE "dynamic") + endif(NOT DEFINED KRATOS_OMP_SCHEDULE) endif((DEFINED ENV{OMP_SCHEDULE})) # Display the selected schedule in the build output From 5503861fb89a585f40263aa26cff5b840a65703c Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Thu, 12 Dec 2024 21:49:13 +0100 Subject: [PATCH 08/15] Enhance Kernel::PrintParallelismSupportInfo to set and log environment variable for scheduling type --- kratos/sources/kernel.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kratos/sources/kernel.cpp b/kratos/sources/kernel.cpp index 14a61f395807..d38ec38ceed1 100644 --- a/kratos/sources/kernel.cpp +++ b/kratos/sources/kernel.cpp @@ -160,9 +160,14 @@ void Kernel::PrintParallelismSupportInfo() const #else scheduling_str = "dynamic"; // NOTE: This should not happen as defined in compiling time #endif + const int overwrite = 1; // Overwrite if it exists, a priori not, that's why we are setting it + const int output_setenv = setenv(var_name, scheduling_str.c_str(), overwrite); + KRATOS_ERROR_IF_NOT(output_setenv == 0) << "Error setting environment variable " << var_name << std::endl; + scheduling_str = "\"" + scheduling_str + "\""; + scheduling_str += " (retrieving from KRATOS_OMP_SCHEDULE)"; } - const auto smp = "OpenMP, scheduling " + scheduling_str; // Use `std::string` for concatenation + const auto smp = "OpenMP, scheduling type is " + scheduling_str; // Use `std::string` for concatenation #elif defined(KRATOS_SMP_CXX11) constexpr auto smp = "C++11"; #else From 67c4c6f528bb694814faa9fd4195710c71cae4a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicente=20Mataix=20Ferr=C3=A1ndiz?= Date: Thu, 12 Dec 2024 23:59:05 +0100 Subject: [PATCH 09/15] Code for Windows --- kratos/sources/kernel.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kratos/sources/kernel.cpp b/kratos/sources/kernel.cpp index d38ec38ceed1..1dd8661e45e3 100644 --- a/kratos/sources/kernel.cpp +++ b/kratos/sources/kernel.cpp @@ -160,9 +160,19 @@ void Kernel::PrintParallelismSupportInfo() const #else scheduling_str = "dynamic"; // NOTE: This should not happen as defined in compiling time #endif + #ifdef KRATOS_COMPILED_IN_WINDOWS + #else const int overwrite = 1; // Overwrite if it exists, a priori not, that's why we are setting it const int output_setenv = setenv(var_name, scheduling_str.c_str(), overwrite); KRATOS_ERROR_IF_NOT(output_setenv == 0) << "Error setting environment variable " << var_name << std::endl; + #endif + #ifdef KRATOS_COMPILED_IN_WINDOWS + const int output_setenv = _putenv_s(var_name, scheduling_str.c_str()); + #else + const int overwrite = 1; // Overwrite if it exists, a priori not, that's why we are setting it + const int output_setenv = setenv(var_name, scheduling_str.c_str(), overwrite); + #endif + KRATOS_ERROR_IF_NOT(output_setenv == 0) << "Error setting environment variable " << var_name << std::endl; scheduling_str = "\"" + scheduling_str + "\""; scheduling_str += " (retrieving from KRATOS_OMP_SCHEDULE)"; } From ad3f34577b3a0c71836a81993b0035a6d1c906c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicente=20Mataix=20Ferr=C3=A1ndiz?= Date: Fri, 13 Dec 2024 09:10:57 +0100 Subject: [PATCH 10/15] Duplicated --- kratos/sources/kernel.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kratos/sources/kernel.cpp b/kratos/sources/kernel.cpp index 1dd8661e45e3..ecc7ba1a4e31 100644 --- a/kratos/sources/kernel.cpp +++ b/kratos/sources/kernel.cpp @@ -160,12 +160,6 @@ void Kernel::PrintParallelismSupportInfo() const #else scheduling_str = "dynamic"; // NOTE: This should not happen as defined in compiling time #endif - #ifdef KRATOS_COMPILED_IN_WINDOWS - #else - const int overwrite = 1; // Overwrite if it exists, a priori not, that's why we are setting it - const int output_setenv = setenv(var_name, scheduling_str.c_str(), overwrite); - KRATOS_ERROR_IF_NOT(output_setenv == 0) << "Error setting environment variable " << var_name << std::endl; - #endif #ifdef KRATOS_COMPILED_IN_WINDOWS const int output_setenv = _putenv_s(var_name, scheduling_str.c_str()); #else From ddfdca61e15c4796492eace48f2ecd193518fed9 Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Mon, 16 Dec 2024 14:48:25 +0100 Subject: [PATCH 11/15] Fix formatting of license comment in test_parallel_utilities.cpp --- kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp b/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp index bbebb26b2891..c9cfe35f5a69 100644 --- a/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp +++ b/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp @@ -4,11 +4,12 @@ // _|\_\_| \__,_|\__|\___/ ____/ // Multi-Physics // -// License: BSD License -// Kratos default license: kratos/license.txt +// License: BSD License +// Kratos default license: kratos/license.txt // // Main authors: Riccardo Rossi // Philipp Bucher (https://github.com/philbucher) +// // System includes #include From 9c9501853d22c8314aa2d16ab4f59dd830f42f99 Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Mon, 16 Dec 2024 15:41:54 +0100 Subject: [PATCH 12/15] Add benchmarks for parallel utilities in parallel_utilities_benchmark.cpp --- .../parallel_utilities_benchmark.cpp | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 kratos/benchmarks/parallel_utilities_benchmark.cpp diff --git a/kratos/benchmarks/parallel_utilities_benchmark.cpp b/kratos/benchmarks/parallel_utilities_benchmark.cpp new file mode 100644 index 000000000000..c43c4b8d2ad5 --- /dev/null +++ b/kratos/benchmarks/parallel_utilities_benchmark.cpp @@ -0,0 +1,103 @@ +// | / | +// ' / __| _` | __| _ \ __| +// . \ | ( | | ( |\__ ` +// _|\_\_| \__,_|\__|\___/ ____/ +// Multi-Physics +// +// License: BSD License +// Kratos default license: kratos/license.txt +// +// Main authors: Vicente Mataix Ferrandiz +// + +// System includes +#include +#include +#include +#include + +// External includes +#include + +// Project includes +#include "utilities/parallel_utilities.h" +#include "utilities/reduction_utilities.h" + +namespace Kratos +{ +// Template class for testing +template +class RHSElement { +public: + explicit RHSElement(const double Val) : mRHSVal(Val) {} + void CalculateRHS(std::vector& rVector) { + if (rVector.size() != TSize) { rVector.resize(TSize); } + std::fill(rVector.begin(), rVector.end(), mRHSVal); + } + double GetAccumRHSValue() { return mAccumRHSValue; } + void SetAccumRHSValue(double Value) { mAccumRHSValue = Value; } + +private: + double mRHSVal; + double mAccumRHSValue = 0.0; +}; + +// Benchmark for power operation on a vector +static void BM_VectorPower(benchmark::State& state) { + int nsize = state.range(0); + std::vector data_vector(nsize, 5.0); + + for (auto _ : state) { + std::for_each(data_vector.begin(), data_vector.end(), [](double& item) { + item = std::pow(item, 0.1); + }); + } +} + +// Benchmark for reduction +static void BM_VectorReduction(benchmark::State& state) { + int nsize = state.range(0); + const std::vector data_vector(nsize, 5.0); + + for (auto _ : state) { + double final_sum = std::accumulate(data_vector.begin(), data_vector.end(), 0.0); + benchmark::DoNotOptimize(final_sum); + } +} + +// Benchmark for element-wise operations with thread-local storage +static void BM_ThreadLocalStorage(benchmark::State& state) { + constexpr std::size_t vec_size = 6; + std::size_t n_elems = state.range(0); + + using RHSElementType = RHSElement; + + std::vector rhs_vals(n_elems); + for (std::size_t i = 0; i < n_elems; ++i) { + rhs_vals[i] = (i % 12) * 1.889; + } + + std::vector elements; + for (std::size_t i = 0; i < rhs_vals.size(); ++i) { + elements.push_back(RHSElementType(rhs_vals[i])); + } + + std::vector tls(vec_size); + + for (auto _ : state) { + for (auto& elem : elements) { + elem.CalculateRHS(tls); + double sum = std::accumulate(tls.begin(), tls.end(), 0.0); + elem.SetAccumRHSValue(sum); + } + } +} + +// Register benchmarks and provide input size as a command-line option +BENCHMARK(BM_VectorPower)->Arg(1e3)->Arg(1e5)->Arg(1e6); +BENCHMARK(BM_VectorReduction)->Arg(1e3)->Arg(1e5)->Arg(1e6); +BENCHMARK(BM_ThreadLocalStorage)->Arg(1e3)->Arg(1e5)->Arg(1e6); + +} // namespace Kratos + +BENCHMARK_MAIN(); From bc5074e8b79238f4514b188e48bd634c61c45dab Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Tue, 17 Dec 2024 17:06:08 +0100 Subject: [PATCH 13/15] Add authorship in parallel_utilities.h --- kratos/utilities/parallel_utilities.h | 1 + 1 file changed, 1 insertion(+) diff --git a/kratos/utilities/parallel_utilities.h b/kratos/utilities/parallel_utilities.h index afbf39d45031..ea8160db755a 100644 --- a/kratos/utilities/parallel_utilities.h +++ b/kratos/utilities/parallel_utilities.h @@ -10,6 +10,7 @@ // Main authors: Riccardo Rossi // Denis Demidov // Philipp Bucher (https://github.com/philbucher) +// Vicente Mataix Ferrandiz // #pragma once From 606882b325fe3fdae9399849977112babd6e3556 Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Tue, 17 Dec 2024 21:01:09 +0100 Subject: [PATCH 14/15] Refactor benchmarks in parallel_utilities_benchmark.cpp to utilize block-based operations for improved performance and clarity --- .../parallel_utilities_benchmark.cpp | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/kratos/benchmarks/parallel_utilities_benchmark.cpp b/kratos/benchmarks/parallel_utilities_benchmark.cpp index c43c4b8d2ad5..963bacba6b21 100644 --- a/kratos/benchmarks/parallel_utilities_benchmark.cpp +++ b/kratos/benchmarks/parallel_utilities_benchmark.cpp @@ -48,7 +48,7 @@ static void BM_VectorPower(benchmark::State& state) { std::vector data_vector(nsize, 5.0); for (auto _ : state) { - std::for_each(data_vector.begin(), data_vector.end(), [](double& item) { + block_for_each(data_vector, [](double& item) { item = std::pow(item, 0.1); }); } @@ -57,11 +57,14 @@ static void BM_VectorPower(benchmark::State& state) { // Benchmark for reduction static void BM_VectorReduction(benchmark::State& state) { int nsize = state.range(0); - const std::vector data_vector(nsize, 5.0); + std::vector data_vector(nsize, 5.0); for (auto _ : state) { - double final_sum = std::accumulate(data_vector.begin(), data_vector.end(), 0.0); - benchmark::DoNotOptimize(final_sum); + auto final_sum = BlockPartition::iterator>(data_vector.begin(), + data_vector.end()).for_each>( + [](double& item){ + return item; + }); } } @@ -82,14 +85,20 @@ static void BM_ThreadLocalStorage(benchmark::State& state) { elements.push_back(RHSElementType(rhs_vals[i])); } - std::vector tls(vec_size); + auto tls_lambda_manual_reduction = [](RHSElementType& rElem, std::vector& rTLS) + { + rElem.CalculateRHS(rTLS); + double rhs_sum = std::accumulate(rTLS.begin(), rTLS.end(), 0.0); + rElem.SetAccumRHSValue(rhs_sum); + }; for (auto _ : state) { - for (auto& elem : elements) { - elem.CalculateRHS(tls); - double sum = std::accumulate(tls.begin(), tls.end(), 0.0); - elem.SetAccumRHSValue(sum); - } + BlockPartition::iterator>(elements.begin(), + elements.end()).for_each(std::vector(), tls_lambda_manual_reduction); + + const double sum_elem_rhs_vals = std::accumulate(elements.begin(), elements.end(), 0.0, [](double acc, RHSElementType& rElem){ + return acc + rElem.GetAccumRHSValue(); + }); } } From 24f40aa7b7a5c1db80b90136df64ec735db66436 Mon Sep 17 00:00:00 2001 From: Vicente Mataix Ferrandiz Date: Wed, 18 Dec 2024 10:28:59 +0100 Subject: [PATCH 15/15] Update OpenMP scheduling in parallel_utilities.h to use runtime scheduling for improved performance --- kratos/utilities/parallel_utilities.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kratos/utilities/parallel_utilities.h b/kratos/utilities/parallel_utilities.h index ea8160db755a..b3a0d649d6ab 100644 --- a/kratos/utilities/parallel_utilities.h +++ b/kratos/utilities/parallel_utilities.h @@ -239,7 +239,7 @@ class BlockPartition // copy the prototype to create the thread local storage TThreadLocalStorage thread_local_storage(rThreadLocalStoragePrototype); - #pragma omp for + #pragma omp for schedule(runtime) for(int i=0; i