diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d56ede3c1a7..8cff69f4a315 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -427,6 +427,27 @@ if (KRATOS_SHARED_MEMORY_PARALLELIZATION STREQUAL "OpenMP") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + + # Check if the environment variable OMP_SCHEDULE is defined + if(DEFINED ENV{OMP_SCHEDULE}) + # Set the already defined one + set(KRATOS_OMP_SCHEDULE $ENV{OMP_SCHEDULE}) + else(DEFINED ENV{OMP_SCHEDULE}) + # If not defined set the default value + if(NOT DEFINED KRATOS_OMP_SCHEDULE) + message(STATUS "OMP_SCHEDULE is not defined, setting to dynamic. You can also set it with the environment variable OMP_SCHEDULE or with the CMake variable KRATOS_OMP_SCHEDULE (e.g., dynamic,4)") + set(KRATOS_OMP_SCHEDULE "dynamic") + endif(NOT DEFINED KRATOS_OMP_SCHEDULE) + endif((DEFINED ENV{OMP_SCHEDULE})) + + # Display the selected schedule in the build output + message(STATUS "KRATOS_OMP_SCHEDULE is set to: ${KRATOS_OMP_SCHEDULE}") + + # Define the OMP_SCHEDULE as a preprocessor macro + add_definitions(-DKRATOS_OMP_SCHEDULE="${KRATOS_OMP_SCHEDULE}") + + # This is the only way to run OMP loops with dynamic schedule without conflicting the GIL + add_definitions(-DPYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF) else (OPENMP_FOUND) message(FATAL_ERROR "OpenMP could not be found!") # fallback solution => in future once better supported we can use the C++11 based parallelization instead diff --git a/kratos/benchmarks/parallel_utilities_benchmark.cpp b/kratos/benchmarks/parallel_utilities_benchmark.cpp new file mode 100644 index 000000000000..963bacba6b21 --- /dev/null +++ b/kratos/benchmarks/parallel_utilities_benchmark.cpp @@ -0,0 +1,112 @@ +// | / | +// ' / __| _` | __| _ \ __| +// . \ | ( | | ( |\__ ` +// _|\_\_| \__,_|\__|\___/ ____/ +// Multi-Physics +// +// License: BSD License +// Kratos default license: kratos/license.txt +// +// Main authors: Vicente Mataix Ferrandiz +// + +// System includes +#include +#include +#include +#include + +// External includes +#include + +// Project includes +#include "utilities/parallel_utilities.h" +#include "utilities/reduction_utilities.h" + +namespace Kratos +{ +// Template class for testing +template +class RHSElement { +public: + explicit RHSElement(const double Val) : mRHSVal(Val) {} + void CalculateRHS(std::vector& rVector) { + if (rVector.size() != TSize) { rVector.resize(TSize); } + std::fill(rVector.begin(), rVector.end(), mRHSVal); + } + double GetAccumRHSValue() { return mAccumRHSValue; } + void SetAccumRHSValue(double Value) { mAccumRHSValue = Value; } + +private: + double mRHSVal; + double mAccumRHSValue = 0.0; +}; + +// Benchmark for power operation on a vector +static void BM_VectorPower(benchmark::State& state) { + int nsize = state.range(0); + std::vector data_vector(nsize, 5.0); + + for (auto _ : state) { + block_for_each(data_vector, [](double& item) { + item = std::pow(item, 0.1); + }); + } +} + +// Benchmark for reduction +static void BM_VectorReduction(benchmark::State& state) { + int nsize = state.range(0); + std::vector data_vector(nsize, 5.0); + + for (auto _ : state) { + auto final_sum = BlockPartition::iterator>(data_vector.begin(), + data_vector.end()).for_each>( + [](double& item){ + return item; + }); + } +} + +// Benchmark for element-wise operations with thread-local storage +static void BM_ThreadLocalStorage(benchmark::State& state) { + constexpr std::size_t vec_size = 6; + std::size_t n_elems = state.range(0); + + using RHSElementType = RHSElement; + + std::vector rhs_vals(n_elems); + for (std::size_t i = 0; i < n_elems; ++i) { + rhs_vals[i] = (i % 12) * 1.889; + } + + std::vector elements; + for (std::size_t i = 0; i < rhs_vals.size(); ++i) { + elements.push_back(RHSElementType(rhs_vals[i])); + } + + auto tls_lambda_manual_reduction = [](RHSElementType& rElem, std::vector& rTLS) + { + rElem.CalculateRHS(rTLS); + double rhs_sum = std::accumulate(rTLS.begin(), rTLS.end(), 0.0); + rElem.SetAccumRHSValue(rhs_sum); + }; + + for (auto _ : state) { + BlockPartition::iterator>(elements.begin(), + elements.end()).for_each(std::vector(), tls_lambda_manual_reduction); + + const double sum_elem_rhs_vals = std::accumulate(elements.begin(), elements.end(), 0.0, [](double acc, RHSElementType& rElem){ + return acc + rElem.GetAccumRHSValue(); + }); + } +} + +// Register benchmarks and provide input size as a command-line option +BENCHMARK(BM_VectorPower)->Arg(1e3)->Arg(1e5)->Arg(1e6); +BENCHMARK(BM_VectorReduction)->Arg(1e3)->Arg(1e5)->Arg(1e6); +BENCHMARK(BM_ThreadLocalStorage)->Arg(1e3)->Arg(1e5)->Arg(1e6); + +} // namespace Kratos + +BENCHMARK_MAIN(); diff --git a/kratos/sources/kernel.cpp b/kratos/sources/kernel.cpp index 326cf077e3fb..98c5c7774fb6 100644 --- a/kratos/sources/kernel.cpp +++ b/kratos/sources/kernel.cpp @@ -211,7 +211,30 @@ void Kernel::PrintParallelismSupportInfo() const constexpr bool threading_support = true; std::string scheduling_str; #if defined(KRATOS_SMP_OPENMP) - const auto smp = "OpenMP"; + // Check if the environment variable is defined + const char* var_name = "OMP_SCHEDULE"; + const char* scheduling = getenv(var_name); + + if (scheduling != nullptr) { // Correct variable name and nullptr comparison + scheduling_str = scheduling; + } else { + #ifdef KRATOS_OMP_SCHEDULE + scheduling_str = KRATOS_OMP_SCHEDULE; // Use the preprocessor-defined value + #else + scheduling_str = "dynamic"; // NOTE: This should not happen as defined in compiling time + #endif + #ifdef KRATOS_COMPILED_IN_WINDOWS + const int output_setenv = _putenv_s(var_name, scheduling_str.c_str()); + #else + const int overwrite = 1; // Overwrite if it exists, a priori not, that's why we are setting it + const int output_setenv = setenv(var_name, scheduling_str.c_str(), overwrite); + #endif + KRATOS_ERROR_IF_NOT(output_setenv == 0) << "Error setting environment variable " << var_name << std::endl; + scheduling_str = "\"" + scheduling_str + "\""; + scheduling_str += " (retrieving from KRATOS_OMP_SCHEDULE)"; + } + + const auto smp = "OpenMP, scheduling type is " + scheduling_str; // Use `std::string` for concatenation #elif defined(KRATOS_SMP_CXX11) constexpr auto smp = "C++11"; #else diff --git a/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp b/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp index bbebb26b2891..c9cfe35f5a69 100644 --- a/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp +++ b/kratos/tests/cpp_tests/utilities/test_parallel_utilities.cpp @@ -4,11 +4,12 @@ // _|\_\_| \__,_|\__|\___/ ____/ // Multi-Physics // -// License: BSD License -// Kratos default license: kratos/license.txt +// License: BSD License +// Kratos default license: kratos/license.txt // // Main authors: Riccardo Rossi // Philipp Bucher (https://github.com/philbucher) +// // System includes #include diff --git a/kratos/utilities/parallel_utilities.h b/kratos/utilities/parallel_utilities.h index 0371bdb1e83c..b3a0d649d6ab 100644 --- a/kratos/utilities/parallel_utilities.h +++ b/kratos/utilities/parallel_utilities.h @@ -10,6 +10,7 @@ // Main authors: Riccardo Rossi // Denis Demidov // Philipp Bucher (https://github.com/philbucher) +// Vicente Mataix Ferrandiz // #pragma once @@ -183,7 +184,7 @@ class BlockPartition { KRATOS_PREPARE_CATCH_THREAD_EXCEPTION - #pragma omp parallel for + #pragma omp parallel for schedule(runtime) for (int i=0; i