diff --git a/demos/CMakeLists.txt b/demos/CMakeLists.txt index 886c62108..c4dc0d812 100644 --- a/demos/CMakeLists.txt +++ b/demos/CMakeLists.txt @@ -3,8 +3,9 @@ find_package(CLI11) add_subdirectory(from_obj) add_subdirectory(FiniteVolume) +add_subdirectory(loadbalancing) # add_subdirectory(MPI) -# add_subdirectory(LBM) +add_subdirectory(LBM) add_subdirectory(p4est) add_subdirectory(pablo) add_subdirectory(tutorial) diff --git a/demos/FiniteVolume/CMakeLists.txt b/demos/FiniteVolume/CMakeLists.txt index e1d8ba6c0..a5f41a5b8 100644 --- a/demos/FiniteVolume/CMakeLists.txt +++ b/demos/FiniteVolume/CMakeLists.txt @@ -27,6 +27,10 @@ if(PETSC_FOUND) target_link_libraries(manual_block_matrix_assembly samurai CLI11::CLI11 ${PETSC_LINK_LIBRARIES} ${MPI_LIBRARIES}) endif() +add_executable(finite-volume-linear-convection linear_convection.cpp) +target_link_libraries(finite-volume-linear-convection samurai CLI11::CLI11 ${MPI_LIBRARIES}) + + add_executable(finite-volume-amr-burgers-hat AMR_Burgers_Hat.cpp) target_link_libraries(finite-volume-amr-burgers-hat samurai CLI11::CLI11) @@ -53,8 +57,6 @@ target_link_libraries(finite-volume-advection-2d-user-bc samurai CLI11::CLI11) add_executable(finite-volume-scalar-burgers-2d scalar_burgers_2d.cpp) target_link_libraries(finite-volume-scalar-burgers-2d samurai CLI11::CLI11) -add_executable(finite-volume-linear-convection linear_convection.cpp) -target_link_libraries(finite-volume-linear-convection samurai CLI11::CLI11) add_executable(finite-volume-burgers burgers.cpp) target_link_libraries(finite-volume-burgers samurai CLI11::CLI11) diff --git a/demos/FiniteVolume/advection_2d.cpp b/demos/FiniteVolume/advection_2d.cpp index 4b6c85d0f..76fa6a5f3 100644 --- a/demos/FiniteVolume/advection_2d.cpp +++ b/demos/FiniteVolume/advection_2d.cpp @@ -15,6 +15,16 @@ #include #include +#include +#include +#include +#include +#include +#include +#include + +#include + #include namespace fs = std::filesystem; @@ -23,11 +33,13 @@ auto init(Mesh& mesh) { auto u = samurai::make_field("u", mesh); + u.fill( 0. ); + samurai::for_each_cell( mesh, [&](auto& cell) { - auto center = cell.center(); + auto center = cell.center(); const double radius = .2; const double x_center = 0.3; const double y_center = 0.3; @@ -38,7 +50,7 @@ auto init(Mesh& mesh) else { u[cell] = 0; - } + } }); return u; @@ -146,21 +158,26 @@ void flux_correction(double dt, const std::array& a, const Field& u, template void save(const fs::path& path, const std::string& filename, const Field& u, const std::string& suffix = "") { - auto mesh = u.mesh(); - auto level_ = samurai::make_field("level", mesh); + auto mesh = u.mesh(); + auto level_ = samurai::make_field("level", mesh); + auto domain_ = samurai::make_field("domain", mesh); if (!fs::exists(path)) { fs::create_directory(path); } + int mrank = 0; + samurai::for_each_cell(mesh, [&](const auto& cell) { - level_[cell] = cell.level; + level_[cell] = cell.level; + domain_[cell] = mrank; }); #ifdef SAMURAI_WITH_MPI mpi::communicator world; + mrank = world.rank(); samurai::save(path, fmt::format("{}_size_{}{}", filename, world.size(), suffix), mesh, u, level_); #else samurai::save(path, fmt::format("{}{}", filename, suffix), mesh, u, level_); @@ -175,6 +192,7 @@ int main(int argc, char* argv[]) using Config = samurai::MRConfig; // Simulation parameters + double radius = 0.2, x_center = 0.3, y_center = 0.3; xt::xtensor_fixed> min_corner = {0., 0.}; xt::xtensor_fixed> max_corner = {1., 1.}; std::array a{ @@ -194,6 +212,7 @@ int main(int argc, char* argv[]) fs::path path = fs::current_path(); std::string filename = "FV_advection_2d"; std::size_t nfiles = 1; + std::size_t nt_loadbalance = 1; // nombre d'iteration entre les equilibrages app.add_option("--min-corner", min_corner, "The min corner of the box")->capture_default_str()->group("Simulation parameters"); app.add_option("--max-corner", max_corner, "The max corner of the box")->capture_default_str()->group("Simulation parameters"); @@ -202,6 +221,7 @@ int main(int argc, char* argv[]) app.add_option("--Tf", Tf, "Final time")->capture_default_str()->group("Simulation parameters"); app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution"); app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh") ->capture_default_str() ->group("Multiresolution"); @@ -227,19 +247,57 @@ int main(int argc, char* argv[]) double t = 0.; auto u = init(mesh); + + samurai::make_bc>(u, 0.); auto unp1 = samurai::make_field("unp1", mesh); auto MRadaptation = samurai::make_MRAdapt(u); + + samurai::times::timers.start("MRadaptation"); MRadaptation(mr_epsilon, mr_regularity); + samurai::times::timers.stop("MRadaptation"); + save(path, filename, u, "_init"); std::size_t nsave = 1; std::size_t nt = 0; + +// For now, void_balancer is verified and works properly +// Diffusion_LoadBalancer_cell not exist ??? +// Load_balancing::Diffusion donne de très mauvais resultats, peut-etre des parametres internes ? + + SFC_LoadBalancer_interval balancer; +// Void_LoadBalancer balancer; +// Diffusion_LoadBalancer_cell balancer; +// Diffusion_LoadBalancer_interval balancer; +// Load_balancing::Diffusion balancer; + // Load_balancing::SFCw balancer; + + std::ofstream logs; +#ifdef SAMURAI_WITH_MPI + boost::mpi::communicator world; + logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app ); +#endif while (t != Tf) { + bool reqBalance = balancer.require_balance( mesh ); + + if( reqBalance ) std::cerr << "\t> Load Balancing required !!! " << std::endl; + + // if ( ( nt % nt_loadbalance == 0 || reqBalance ) && nt > 1 ) + if ( ( nt % nt_loadbalance == 0 ) && nt > 1 ) + // if ( reqBalance && nt > 1 ) + { + samurai::times::timers.start("load-balancing"); + balancer.load_balance(mesh, u); + samurai::times::timers.stop("load-balancing"); + } + + samurai::times::timers.start("MRadaptation"); MRadaptation(mr_epsilon, mr_regularity); + samurai::times::timers.stop("MRadaptation"); t += dt; if (t > Tf) @@ -251,8 +309,13 @@ int main(int argc, char* argv[]) std::cout << fmt::format("iteration {}: t = {}, dt = {}", nt++, t, dt) << std::endl; samurai::update_ghost_mr(u); + unp1.resize(); + + samurai::times::timers.start("upwind"); unp1 = u - dt * samurai::upwind(a, u); + samurai::times::timers.stop("upwind"); + if (correction) { flux_correction(dt, a, u, unp1); @@ -265,7 +328,9 @@ int main(int argc, char* argv[]) const std::string suffix = (nfiles != 1) ? fmt::format("_ite_{}", nsave++) : ""; save(path, filename, u, suffix); } + } + samurai::finalize(); return 0; } diff --git a/demos/FiniteVolume/linear_convection.cpp b/demos/FiniteVolume/linear_convection.cpp index 910739f03..1c3b4e380 100644 --- a/demos/FiniteVolume/linear_convection.cpp +++ b/demos/FiniteVolume/linear_convection.cpp @@ -7,6 +7,20 @@ #include #include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef WITH_STATS +#include "samurai/statistics.hpp" +#endif + #include namespace fs = std::filesystem; @@ -44,7 +58,9 @@ int main(int argc, char* argv[]) //--------------------// // Program parameters // //--------------------// - +#ifdef SAMURAI_WITH_MPI + boost::mpi::communicator world; +#endif // Simulation parameters double left_box = -1; double right_box = 1; @@ -64,6 +80,7 @@ int main(int argc, char* argv[]) fs::path path = fs::current_path(); std::string filename = "linear_convection_" + std::to_string(dim) + "D"; std::size_t nfiles = 0; + std::size_t nt_loadbalance = 10; app.add_option("--left", left_box, "The left border of the box")->capture_default_str()->group("Simulation parameters"); app.add_option("--right", right_box, "The right border of the box")->capture_default_str()->group("Simulation parameters"); @@ -72,6 +89,7 @@ int main(int argc, char* argv[]) app.add_option("--cfl", cfl, "The CFL")->capture_default_str()->group("Simulation parameters"); app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution"); app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh") ->capture_default_str() ->group("Multiresolution"); @@ -96,31 +114,40 @@ int main(int argc, char* argv[]) box_corner2.fill(right_box); Box box(box_corner1, box_corner2); std::array periodic; - periodic.fill(true); + periodic.fill(false); + + samurai::times::timers.start("init"); samurai::MRMesh mesh{box, min_level, max_level, periodic}; // Initial solution - auto u = samurai::make_field<1>("u", + auto u = samurai::make_field("u", mesh, - [](const auto& coords) + [&](const auto& coords) { if constexpr (dim == 1) { const auto& x = coords(0); return (x >= -0.8 && x <= -0.3) ? 1. : 0.; } - else + else if ( dim == 2 ) { const auto& x = coords(0); const auto& y = coords(1); return (x >= -0.8 && x <= -0.3 && y >= 0.3 && y <= 0.8) ? 1. : 0.; } + }); + samurai::times::timers.stop("init"); - auto unp1 = samurai::make_field<1>("unp1", mesh); + auto unp1 = samurai::make_field("unp1", mesh); // Intermediary fields for the RK3 scheme - auto u1 = samurai::make_field<1>("u1", mesh); - auto u2 = samurai::make_field<1>("u2", mesh); + auto u1 = samurai::make_field("u1", mesh); + auto u2 = samurai::make_field("u2", mesh); + + samurai::make_bc>(u, 0.); + samurai::make_bc>(unp1, 0.); + samurai::make_bc>(u1, 0.); + samurai::make_bc>(u2, 0.); unp1.fill(0); u1.fill(0); @@ -129,12 +156,19 @@ int main(int argc, char* argv[]) // Convection operator samurai::VelocityVector velocity; velocity.fill(1); - if constexpr (dim == 2) - { - velocity(1) = -1; - } + velocity(1) = -1 ; + + // origin weno5 auto conv = samurai::make_convection_weno5(velocity); + // SFC_LoadBalancer_interval balancer; + // Load_balancing::Life balancer; +// Load_balancing::GlobalCriteria balancer; + // Void_LoadBalancer balancer; + // Diffusion_LoadBalancer_cell balancer; + // Diffusion_LoadBalancer_interval balancer; + // Load_balancing::Diffusion balancer; + //--------------------// // Time iteration // //--------------------// @@ -148,7 +182,10 @@ int main(int argc, char* argv[]) } auto MRadaptation = samurai::make_MRAdapt(u); + + samurai::times::timers.start("MRadaptation"); MRadaptation(mr_epsilon, mr_regularity); + samurai::times::timers.stop("MRadaptation"); double dt_save = nfiles == 0 ? dt : Tf / static_cast(nfiles); std::size_t nsave = 0, nt = 0; @@ -158,9 +195,20 @@ int main(int argc, char* argv[]) save(path, filename, u, suffix); } + samurai::times::timers.start("tloop"); double t = 0; while (t != Tf) { + +/** + if (nt % nt_loadbalance == 0 && nt > 1 ) + { + samurai::times::timers.start("tloop.lb:"+balancer.getName()); + balancer.load_balance(mesh, u); + samurai::times::timers.stop("tloop.lb:"+balancer.getName()); + + } + **/ // Move to next timestep t += dt; if (t > Tf) @@ -168,30 +216,44 @@ int main(int argc, char* argv[]) dt += Tf - t; t = Tf; } - std::cout << fmt::format("iteration {}: t = {:.2f}, dt = {}", nt++, t, dt) << std::flush; + + std::cout << fmt::format("iteration {}: t = {:.2f}, dt = {}", nt++, t, dt) << std::flush << std::endl; // Mesh adaptation + samurai::times::timers.start("tloop.MRadaptation"); MRadaptation(mr_epsilon, mr_regularity); + samurai::times::timers.stop("tloop.MRadaptation"); + + samurai::times::timers.start("tloop.ugm"); samurai::update_ghost_mr(u); + samurai::times::timers.stop("tloop.ugm"); + + samurai::times::timers.start("tloop.resize_fill"); unp1.resize(); + unp1.fill(0); + u1.resize(); u2.resize(); u1.fill(0); u2.fill(0); + samurai::times::timers.stop("tloop.resize_fill"); // unp1 = u - dt * conv(u); // TVD-RK3 (SSPRK3) + samurai::times::timers.start("tloop.RK3"); u1 = u - dt * conv(u); samurai::update_ghost_mr(u1); u2 = 3. / 4 * u + 1. / 4 * (u1 - dt * conv(u1)); samurai::update_ghost_mr(u2); unp1 = 1. / 3 * u + 2. / 3 * (u2 - dt * conv(u2)); + samurai::times::timers.stop("tloop.RK3"); // u <-- unp1 std::swap(u.array(), unp1.array()); // Save the result + samurai::times::timers.start("tloop.io"); if (nfiles == 0 || t >= static_cast(nsave + 1) * dt_save || t == Tf) { if (nfiles != 1) @@ -204,9 +266,10 @@ int main(int argc, char* argv[]) save(path, filename, u); } } + samurai::times::timers.stop("tloop.io"); - std::cout << std::endl; } + samurai::times::timers.stop("tloop"); if constexpr (dim == 1) { diff --git a/demos/FiniteVolume/linear_convection_ponio.cpp b/demos/FiniteVolume/linear_convection_ponio.cpp new file mode 100644 index 000000000..2e4594c27 --- /dev/null +++ b/demos/FiniteVolume/linear_convection_ponio.cpp @@ -0,0 +1,278 @@ +// Copyright 2018-2024 the samurai's authors +// SPDX-License-Identifier: BSD-3-Clause +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "ponio/runge_kutta.hpp" +#include "ponio/solver.hpp" + +#ifdef WITH_STATS +#include "samurai/statistics.hpp" +#endif + +#include +namespace fs = std::filesystem; + +template +double exact_solution(xt::xtensor_fixed> coords, double t) +{ + const double a = 1; + const double b = 0; + const double& x = coords(0); + return (a * x + b) / (a * t + 1); +} + +template +void save(const fs::path& path, const std::string& filename, Field& u, const std::string& suffix = "") +{ + auto mesh = u.mesh(); + u.name() = "u"; + auto level_ = samurai::make_field("level", mesh); + + if (!fs::exists(path)) + { + fs::create_directory(path); + } + + samurai::for_each_cell(mesh, + [&](const auto& cell) + { + level_[cell] = cell.level; + }); + + samurai::save(path, fmt::format("{}{}", filename, suffix), mesh, u, level_); +} + +int main(int argc, char* argv[]) +{ + samurai::initialize(argc, argv); + + static constexpr std::size_t dim = 2; + using Config = samurai::MRConfig; + using Box = samurai::Box; + using point_t = typename Box::point_t; + + std::cout << "------------------------- Linear convection -------------------------" << std::endl; + + //--------------------// + // Program parameters // + //--------------------// + boost::mpi::communicator world; + + // Simulation parameters + double left_box = 0; + double right_box = 4; + std::string init_sol = "hat"; + + // Time integration + double Tf = 3; + double dt = 0; + double cfl = 0.95; + + // Multiresolution parameters + std::size_t min_level = 0; + std::size_t max_level = dim == 1 ? 5 : 3; + double mr_epsilon = 1e-4; // Threshold used by multiresolution + double mr_regularity = 1.; // Regularity guess for multiresolution + + // Output parameters + fs::path path = fs::current_path(); + std::string filename = "linear_convection_" + std::to_string(dim) + "D"; + std::size_t nfiles = 0; + std::size_t nt_loadbalance = 10; + + CLI::App app{"Finite volume example for the heat equation in 1d"}; + app.add_option("--left", left_box, "The left border of the box")->capture_default_str()->group("Simulation parameters"); + app.add_option("--right", right_box, "The right border of the box")->capture_default_str()->group("Simulation parameters"); + app.add_option("--init-sol", init_sol, "Initial solution: hat/linear/bands")->capture_default_str()->group("Simulation parameters"); + app.add_option("--Tf", Tf, "Final time")->capture_default_str()->group("Simulation parameters"); + app.add_option("--dt", dt, "Time step")->capture_default_str()->group("Simulation parameters"); + app.add_option("--cfl", cfl, "The CFL")->capture_default_str()->group("Simulation parameters"); + app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh") + ->capture_default_str() + ->group("Multiresolution"); + app.add_option("--mr-reg", + mr_regularity, + "The regularity criteria used by the multiresolution to " + "adapt the mesh") + ->capture_default_str() + ->group("Multiresolution"); + app.add_option("--path", path, "Output path")->capture_default_str()->group("Ouput"); + app.add_option("--filename", filename, "File name prefix")->capture_default_str()->group("Ouput"); + app.add_option("--nfiles", nfiles, "Number of output files")->capture_default_str()->group("Ouput"); + app.allow_extras(); + CLI11_PARSE(app, argc, argv); + + //--------------------// + // Problem definition // + //--------------------// + + point_t box_corner1, box_corner2; + box_corner1.fill(left_box); + box_corner2.fill(right_box); + Box box(box_corner1, box_corner2); + std::array periodic; + periodic.fill(false); + + samurai::times::timers.start("init"); + samurai::MRMesh mesh{box, min_level, max_level, periodic}; + + // Initial solution + auto u = samurai::make_field("u", + mesh, + [&](const auto& coords) + { + if constexpr (dim == 1) + { + auto& x = coords(0); + return (x >= -0.8 && x <= -0.3) ? 1. : 0.; + } + else if ( dim == 2 ) + { + auto& x = coords(0); + auto& y = coords(1); + return (x <= 0.8 && x >= 0.3 && y >= 0.3 && y <= 0.8) ? 1. : 0.; + }else { + auto & x = coords( 0 ); + auto & y = coords( 1 ); + auto & z = coords( 2 ); + return (x <= 0.8 && x >= 0.3 && y >= 0.3 && y <= 0.8 && z >= 0.3 && z <= 0.8 ) ? 1. : 0.; + } + + }); + samurai::times::timers.stop("init"); + + samurai::make_bc>(u, 0.); + + // Convection operator + samurai::VelocityVector velocity; + velocity.fill(1); + + // origin weno5 + auto conv = samurai::make_convection_weno5(velocity); + + auto ponio_f = [&]([[maybe_unused]]double t, auto && u){ + samurai::make_bc>(u, 0.); + samurai::update_ghost_mr( u ); + return - conv( u ); + }; + + // SFC_LoadBalancer_interval balancer; + // Load_balancing::Life balancer; + // Void_LoadBalancer balancer; + Diffusion_LoadBalancer_cell balancer; + // Diffusion_LoadBalancer_interval balancer; + // Load_balancing::Diffusion balancer; + + //--------------------// + // Time iteration // + //--------------------// + + if (dt == 0) + { + double dx = samurai::cell_length(max_level); + auto a = xt::abs(velocity); + double sum_velocities = xt::sum(xt::abs(velocity))(); + dt = cfl * dx / sum_velocities; + } + + auto sol_range = ponio::make_solver_range( ponio_f, ponio::runge_kutta::rk_33(), u, {0.0, Tf}, dt ); + + auto it = sol_range.begin(); + + auto MRadaptation = samurai::make_MRAdapt( it->state ); + + samurai::times::timers.start("MRadaptation"); + MRadaptation(mr_epsilon, mr_regularity); + samurai::times::timers.stop("MRadaptation"); + + double dt_save = nfiles == 0 ? dt : Tf / static_cast(nfiles); + std::size_t nsave = 0, nt = 0; + if (nfiles != 1) + { + std::string suffix = (nfiles != 1) ? fmt::format("_ite_{}", nsave++) : ""; + save(path, filename, it->state, suffix); + } + + samurai::times::timers.start("tloop"); + + while ( it->time != Tf) + { + + if ( balancer.require_balance( mesh ) ) + { + samurai::times::timers.start("tloop.lb:"+balancer.getName()); + balancer.load_balance(mesh, it->state); + samurai::times::timers.stop("tloop.lb:"+balancer.getName()); + } + std::cout << fmt::format("iteration {}: t = {:.2f}, dt = {}", nt++, it->time, it->time_step) << std::endl; + + // Mesh adaptation + samurai::times::timers.start("tloop.MRadaptation"); + MRadaptation(mr_epsilon, mr_regularity); + samurai::times::timers.stop("tloop.MRadaptation"); + + samurai::times::timers.start("tloop.ugm"); + samurai::update_ghost_mr( it->state ); + samurai::times::timers.stop("tloop.ugm"); + + samurai::times::timers.start("tloop.resize_fill"); + for ( auto& ki : it.meth.kis ) + { + ki.resize(); + ki.fill( 0. ); + } + samurai::times::timers.stop("tloop.resize_fill"); + + samurai::times::timers.start("tloop.scheme"); + ++it; + samurai::times::timers.stop("tloop.scheme"); + + // Save the result + samurai::times::timers.start("tloop.io"); + if (nfiles == 0 || it->time >= static_cast(nsave + 1) * dt_save || it->time == Tf) + { + if (nfiles != 1) + { + std::string suffix = (nfiles != 1) ? fmt::format("_ite_{}", nsave++) : ""; + save(path, filename, it->state, suffix); + } + else + { + save(path, filename, it->state); + } + } + samurai::times::timers.stop("tloop.io"); + + } + samurai::times::timers.stop("tloop"); + + if constexpr (dim == 1) + { + std::cout << std::endl; + std::cout << "Run the following command to view the results:" << std::endl; + std::cout << "python <>/python/read_mesh.py " << filename << "_ite_ --field u level --start 0 --end " << nsave + << std::endl; + } + + samurai::finalize(); + return 0; +} diff --git a/demos/LBM/CMakeLists.txt b/demos/LBM/CMakeLists.txt index 71d959b9d..2eb421061 100644 --- a/demos/LBM/CMakeLists.txt +++ b/demos/LBM/CMakeLists.txt @@ -30,7 +30,7 @@ target_link_libraries(lbm-D1Q222-euler-sod samurai) # 2D Paper add_executable(lbm-D2Q4444-euler-lax-liu D2Q4444_Euler_Lax_Liu.cpp) -target_link_libraries(lbm-D2Q4444-euler-lax-liu samurai) +target_link_libraries(lbm-D2Q4444-euler-lax-liu samurai CLI11::CLI11 ${MPI_LIBRARIES}) add_executable(lbm-D2Q4444-euler-lax-liu-uniform D2Q4444_Euler_Lax_Liu_uniform.cpp) target_link_libraries(lbm-D2Q4444-euler-lax-liu-uniform samurai) diff --git a/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp b/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp index 5cb94fcef..8ec94cd52 100644 --- a/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp +++ b/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp @@ -4,44 +4,32 @@ #include #include -#include +#include +#include #include #include #include -#include +#include +#include #include -#include -#include "boundary_conditions.hpp" -#include "prediction_map_2d.hpp" - -#include "utils_lbm_mr_2d.hpp" - -/// Timer used in tic & toc -auto tic_timer = std::chrono::high_resolution_clock::now(); - -/// Launching the timer -void tic() -{ - tic_timer = std::chrono::high_resolution_clock::now(); -} - -/// Stopping the timer and returning the duration in seconds -double toc() -{ - const auto toc_timer = std::chrono::high_resolution_clock::now(); - const std::chrono::duration time_span = toc_timer - tic_timer; - return time_span.count(); -} +#include +#include +#include +#include +#include +#include +#include +#include double gm = 1.4; // Gas constant template -auto init_f(samurai::MROMesh& mesh, int config, double lambda) +auto init_f(samurai::MRMesh& mesh, int config, double lambda) { constexpr std::size_t nvel = 16; - using mesh_id_t = typename samurai::MROMesh::mesh_id_t; + using mesh_id_t = typename samurai::MRMesh::mesh_id_t; auto f = samurai::make_field("f", mesh); f.fill(0); @@ -192,79 +180,8 @@ auto init_f(samurai::MROMesh& mesh, int config, double lambda) return f; } -template -auto compute_prediction(std::size_t min_level, std::size_t max_level) -{ - coord_index_t i = 0, j = 0; - std::vector>> data(max_level - min_level + 1); - - auto rotation_of_pi_over_two = [](int alpha, int k, int h) - { - // Returns the rotation of (k, h) of an angle alpha * pi / 2. - // All the operations are performed on integer, to be exact - int cosinus = static_cast(std::round(std::cos(alpha * M_PI / 2.))); - int sinus = static_cast(std::round(std::sin(alpha * M_PI / 2.))); - - return std::pair(cosinus * k - sinus * h, sinus * k + cosinus * h); - }; - - // Transforms the coordinates to apply the rotation - auto tau = [](int delta, int k) - { - // The case in which delta = 0 is rather exceptional - if (delta == 0) - { - return k; - } - else - { - auto tmp = (1 << (delta - 1)); - return static_cast((k < tmp) ? (k - tmp) : (k - tmp + 1)); - } - }; - - auto tau_inverse = [](int delta, int k) - { - if (delta == 0) - { - return k; - } - else - { - auto tmp = (1 << (delta - 1)); - return static_cast((k < 0) ? (k + tmp) : (k + tmp - 1)); - } - }; - - for (std::size_t k = 0; k < max_level - min_level + 1; ++k) - { - int size = (1 << k); - data[k].resize(4); - - for (int alpha = 0; alpha < 4; ++alpha) - { - for (int l = 0; l < size; ++l) - { - // The reference direction from which the other ones are - // computed is that of (1, 0) - auto rotated_in = rotation_of_pi_over_two(alpha, tau(k, i * size - 1), tau(k, j * size + l)); - auto rotated_out = rotation_of_pi_over_two(alpha, tau(k, (i + 1) * size - 1), tau(k, j * size + l)); - - // For the cells inside the domain, we can already combine - // entering and exiting fluxes and we have a compensation of - // many cells. - data[k][alpha] += (prediction(k, tau_inverse(k, rotated_in.first), tau_inverse(k, rotated_in.second)) - - prediction(k, tau_inverse(k, rotated_out.first), tau_inverse(k, rotated_out.second))); - } - } - } - return data; -} - -template +template void one_time_step(Field& f, - Func&& update_bc_for_level, - const pred& pred_coeff, const double lambda, const double sq_rho, const double sxy_rho, @@ -273,193 +190,155 @@ void one_time_step(Field& f, const double sq_e, const double sxy_e) { - constexpr std::size_t nvel = Field::size; - using coord_index_t = typename Field::interval_t::coord_index_t; + constexpr std::size_t dim = Field::dim; + auto& mesh = f.mesh(); + using mesh_id_t = typename Field::mesh_t::mesh_id_t; - auto mesh = f.mesh(); - using mesh_id_t = typename decltype(mesh)::mesh_id_t; - - auto min_level = mesh.min_level(); - auto max_level = mesh.max_level(); + std::size_t max_level = mesh.max_level(); - samurai::update_ghost_mr(f, std::forward(update_bc_for_level)); - samurai::update_overleaves_mr(f, std::forward(update_bc_for_level)); + samurai::times::timers.start("ugm-step"); + samurai::update_ghost_mr(f); + samurai::times::timers.stop("ugm-step"); + samurai::times::timers.start("field-step"); Field new_f{"new_f", mesh}; new_f.array().fill(0.); - Field fluxes{"fluxes", mesh}; // This stored the fluxes computed at the level of the overleaves - fluxes.array().fill(0.); Field advected{"advected", mesh}; advected.array().fill(0.); + samurai::times::timers.stop("field-step"); - for (std::size_t level = min_level; level <= max_level; ++level) - { - auto leaves = samurai::intersection(mesh[mesh_id_t::cells][level], mesh[mesh_id_t::cells][level]); - - if (level == max_level) - { // Advection at the finest level - - leaves( - [&](auto& interval, auto& index) - { - auto k = interval; // Logical index in x - auto h = index[0]; // Logical index in y - - // We enforce a bounce-back - for (int scheme_n = 0; scheme_n < 4; ++scheme_n) - { // We have 4 schemes - advected(0 + 4 * scheme_n, level, k, h) = f(0 + 4 * scheme_n, level, k - 1, h); - advected(1 + 4 * scheme_n, level, k, h) = f(1 + 4 * scheme_n, level, k, h - 1); - advected(2 + 4 * scheme_n, level, k, h) = f(2 + 4 * scheme_n, level, k + 1, h); - advected(3 + 4 * scheme_n, level, k, h) = f(3 + 4 * scheme_n, level, k, h + 1); - } - }); - } - else // Advection at the coarse levels using the overleaves + samurai::times::timers.start("lbm-step"); + samurai::for_each_interval( + mesh[mesh_id_t::cells], + [&](std::size_t level, auto& i, auto& index) { - auto lev_p_1 = level + 1; - std::size_t j = max_level - (lev_p_1); - double coeff = 1. / (1 << (2 * j)); // ATTENTION A LA DIMENSION 2 !!!! - - leaves.on(level + 1)([&](auto& interval, auto& index) { // This are overleaves - auto k = interval; // Logical index in x - auto h = index[0]; // Logical index in y - - for (int scheme_n = 0; scheme_n < 4; ++scheme_n) - { - auto shift = 4 * scheme_n; - - for (std::size_t alpha = 0; alpha < 4; ++alpha) - { - for (auto& c : pred_coeff[j][alpha].coeff) - { - coord_index_t stencil_x, stencil_y; - std::tie(stencil_x, stencil_y) = c.first; - - fluxes(alpha + shift, lev_p_1, k, h) += c.second * f(alpha + shift, lev_p_1, k + stencil_x, h + stencil_y); - } - } - } - }); + auto j = index[0]; + auto jump = max_level - level; + double coef = 1. / (1 << (dim * jump)); + for (std::size_t scheme_n = 0; scheme_n < 4; ++scheme_n) + { // We have 4 schemes + advected(0 + 4 * scheme_n, level, i, j) = f(0 + 4 * scheme_n, level, i-1, j); + advected(1 + 4 * scheme_n, level, i, j) = f(1 + 4 * scheme_n, level, i, j-1); + advected(2 + 4 * scheme_n, level, i, j) = f(2 + 4 * scheme_n, level, i+1, j); + advected(3 + 4 * scheme_n, level, i, j) = f(3 + 4 * scheme_n, level, i, j+1); + + // advected( + // 0 + 4 * scheme_n, + // level, + // i, + // j) = f(0 + 4 * scheme_n, level, i, j) + // + coef * samurai::portion(f, 0 + 4 * scheme_n, level, i - 1, j, jump, {(1 << jump) - 1, (1 << jump)}, {0, (1 << jump)}) + // - coef * samurai::portion(f, 0 + 4 * scheme_n, level, i, j, jump, {(1 << jump) - 1, (1 << jump)}, {0, (1 << jump)}); + + // advected( + // 1 + 4 * scheme_n, + // level, + // i, + // j) = f(1 + 4 * scheme_n, level, i, j) + // + coef * samurai::portion(f, 1 + 4 * scheme_n, level, i, j - 1, jump, {0, (1 << jump)}, {(1 << jump) - 1, (1 << jump)}) + // - coef * samurai::portion(f, 1 + 4 * scheme_n, level, i, j, jump, {0, (1 << jump)}, {(1 << jump) - 1, (1 << jump)}); + + // advected(2 + 4 * scheme_n, level, i, j) = f( + // 2 + 4 * scheme_n, + // level, + // i, + // j) = f(2 + 4 * scheme_n, level, i, j) + // + coef * samurai::portion(f, 2 + 4 * scheme_n, level, i + 1, j, jump, {0, 1}, {0, (1 << jump)}) + // - coef * samurai::portion(f, 2 + 4 * scheme_n, level, i, j, jump, {0, 1}, {0, (1 << jump)}); + // advected(3 + 4 * scheme_n, + // level, + // i, + // j) = f(3 + 4 * scheme_n, level, i, j) + // + coef * samurai::portion(f, 3 + 4 * scheme_n, level, i, j + 1, jump, {0, (1 << jump)}, {0, 1}) + // - coef * samurai::portion(f, 3 + 4 * scheme_n, level, i, j, jump, {0, (1 << jump)}, {0, 1}); + } - leaves( - [&](auto& interval, auto& index) - { - auto k = interval; // Logical index in x - auto h = index[0]; // Logical index in y - - for (int alpha = 0; alpha < 16; ++alpha) - { - advected(alpha, level, k, h) = f(alpha, level, k, h) - + coeff * 0.25 - * (fluxes(alpha, lev_p_1, 2 * k, 2 * h) + fluxes(alpha, lev_p_1, 2 * k + 1, 2 * h) - + fluxes(alpha, lev_p_1, 2 * k, 2 * h + 1) - + fluxes(alpha, lev_p_1, 2 * k + 1, 2 * h + 1)); - } - }); - } + // We compute the advected momenti + auto m0_0 = xt::eval(advected(0, level, i, j) + advected(1, level, i, j) + advected(2, level, i, j) + advected(3, level, i, j)); + auto m0_1 = xt::eval(lambda * (advected(0, level, i, j) - advected(2, level, i, j))); + auto m0_2 = xt::eval(lambda * (advected(1, level, i, j) - advected(3, level, i, j))); + auto m0_3 = xt::eval( + lambda * lambda * (advected(0, level, i, j) - advected(1, level, i, j) + advected(2, level, i, j) - advected(3, level, i, j))); + + auto m1_0 = xt::eval(advected(4, level, i, j) + advected(5, level, i, j) + advected(6, level, i, j) + advected(7, level, i, j)); + auto m1_1 = xt::eval(lambda * (advected(4, level, i, j) - advected(6, level, i, j))); + auto m1_2 = xt::eval(lambda * (advected(5, level, i, j) - advected(7, level, i, j))); + auto m1_3 = xt::eval( + lambda * lambda * (advected(4, level, i, j) - advected(5, level, i, j) + advected(6, level, i, j) - advected(7, level, i, j))); + + auto m2_0 = xt::eval(advected(8, level, i, j) + advected(9, level, i, j) + advected(10, level, i, j) + advected(11, level, i, j)); + auto m2_1 = xt::eval(lambda * (advected(8, level, i, j) - advected(10, level, i, j))); + auto m2_2 = xt::eval(lambda * (advected(9, level, i, j) - advected(11, level, i, j))); + auto m2_3 = xt::eval( + lambda * lambda + * (advected(8, level, i, j) - advected(9, level, i, j) + advected(10, level, i, j) - advected(11, level, i, j))); + + auto m3_0 = xt::eval(advected(12, level, i, j) + advected(13, level, i, j) + advected(14, level, i, j) + + advected(15, level, i, j)); + auto m3_1 = xt::eval(lambda * (advected(12, level, i, j) - advected(14, level, i, j))); + auto m3_2 = xt::eval(lambda * (advected(13, level, i, j) - advected(15, level, i, j))); + auto m3_3 = xt::eval( + lambda * lambda + * (advected(12, level, i, j) - advected(13, level, i, j) + advected(14, level, i, j) - advected(15, level, i, j))); + + m0_1 = (1 - sq_rho) * m0_1 + sq_rho * (m1_0); + m0_2 = (1 - sq_rho) * m0_2 + sq_rho * (m2_0); + m0_3 = (1 - sxy_rho) * m0_3; + + m1_1 = (1 - sq_q) * m1_1 + + sq_q * ((3. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (1. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (gm - 1.) * m3_0); + m1_2 = (1 - sq_q) * m1_2 + sq_q * (m1_0 * m2_0 / m0_0); + m1_3 = (1 - sxy_q) * m1_3; + + m2_1 = (1 - sq_q) * m2_1 + sq_q * (m1_0 * m2_0 / m0_0); + m2_2 = (1 - sq_q) * m2_2 + + sq_q * ((3. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (1. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (gm - 1.) * m3_0); + m2_3 = (1 - sxy_q) * m2_3; + + m3_1 = (1 - sq_e) * m3_1 + + sq_e + * (gm * (m1_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m1_0 * m1_0 * m1_0) / (m0_0 * m0_0) + - (gm / 2. - 1. / 2.) * (m1_0 * m2_0 * m2_0) / (m0_0 * m0_0)); + m3_2 = (1 - sq_e) * m3_2 + + sq_e + * (gm * (m2_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m2_0 * m2_0 * m2_0) / (m0_0 * m0_0) + - (gm / 2. - 1. / 2.) * (m2_0 * m1_0 * m1_0) / (m0_0 * m0_0)); + m3_3 = (1 - sxy_e) * m3_3; + + new_f(0, level, i, j) = .25 * m0_0 + .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3; + new_f(1, level, i, j) = .25 * m0_0 + .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3; + new_f(2, level, i, j) = .25 * m0_0 - .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3; + new_f(3, level, i, j) = .25 * m0_0 - .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3; + + new_f(4, level, i, j) = .25 * m1_0 + .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3; + new_f(5, level, i, j) = .25 * m1_0 + .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3; + new_f(6, level, i, j) = .25 * m1_0 - .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3; + new_f(7, level, i, j) = .25 * m1_0 - .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3; + + new_f(8, level, i, j) = .25 * m2_0 + .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3; + new_f(9, level, i, j) = .25 * m2_0 + .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3; + new_f(10, level, i, j) = .25 * m2_0 - .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3; + new_f(11, level, i, j) = .25 * m2_0 - .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3; + + new_f(12, level, i, j) = .25 * m3_0 + .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3; + new_f(13, level, i, j) = .25 * m3_0 + .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3; + new_f(14, level, i, j) = .25 * m3_0 - .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3; + new_f(15, level, i, j) = .25 * m3_0 - .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3; + }); + + samurai::times::timers.stop("lbm-step"); - leaves( - [&](auto& interval, auto& index) - { - auto k = interval; // Logical index in x - auto h = index[0]; // Logical index in y - - // We compute the advected momenti - auto m0_0 = xt::eval(advected(0, level, k, h) + advected(1, level, k, h) + advected(2, level, k, h) - + advected(3, level, k, h)); - auto m0_1 = xt::eval(lambda * (advected(0, level, k, h) - advected(2, level, k, h))); - auto m0_2 = xt::eval(lambda * (advected(1, level, k, h) - advected(3, level, k, h))); - auto m0_3 = xt::eval( - lambda * lambda - * (advected(0, level, k, h) - advected(1, level, k, h) + advected(2, level, k, h) - advected(3, level, k, h))); - - auto m1_0 = xt::eval(advected(4, level, k, h) + advected(5, level, k, h) + advected(6, level, k, h) - + advected(7, level, k, h)); - auto m1_1 = xt::eval(lambda * (advected(4, level, k, h) - advected(6, level, k, h))); - auto m1_2 = xt::eval(lambda * (advected(5, level, k, h) - advected(7, level, k, h))); - auto m1_3 = xt::eval( - lambda * lambda - * (advected(4, level, k, h) - advected(5, level, k, h) + advected(6, level, k, h) - advected(7, level, k, h))); - - auto m2_0 = xt::eval(advected(8, level, k, h) + advected(9, level, k, h) + advected(10, level, k, h) - + advected(11, level, k, h)); - auto m2_1 = xt::eval(lambda * (advected(8, level, k, h) - advected(10, level, k, h))); - auto m2_2 = xt::eval(lambda * (advected(9, level, k, h) - advected(11, level, k, h))); - auto m2_3 = xt::eval( - lambda * lambda - * (advected(8, level, k, h) - advected(9, level, k, h) + advected(10, level, k, h) - advected(11, level, k, h))); - - auto m3_0 = xt::eval(advected(12, level, k, h) + advected(13, level, k, h) + advected(14, level, k, h) - + advected(15, level, k, h)); - auto m3_1 = xt::eval(lambda * (advected(12, level, k, h) - advected(14, level, k, h))); - auto m3_2 = xt::eval(lambda * (advected(13, level, k, h) - advected(15, level, k, h))); - auto m3_3 = xt::eval( - lambda * lambda - * (advected(12, level, k, h) - advected(13, level, k, h) + advected(14, level, k, h) - advected(15, level, k, h))); - - m0_1 = (1 - sq_rho) * m0_1 + sq_rho * (m1_0); - m0_2 = (1 - sq_rho) * m0_2 + sq_rho * (m2_0); - m0_3 = (1 - sxy_rho) * m0_3; - - m1_1 = (1 - sq_q) * m1_1 - + sq_q * ((3. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (1. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (gm - 1.) * m3_0); - m1_2 = (1 - sq_q) * m1_2 + sq_q * (m1_0 * m2_0 / m0_0); - m1_3 = (1 - sxy_q) * m1_3; - - m2_1 = (1 - sq_q) * m2_1 + sq_q * (m1_0 * m2_0 / m0_0); - m2_2 = (1 - sq_q) * m2_2 - + sq_q * ((3. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (1. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (gm - 1.) * m3_0); - m2_3 = (1 - sxy_q) * m2_3; - - m3_1 = (1 - sq_e) * m3_1 - + sq_e - * (gm * (m1_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m1_0 * m1_0 * m1_0) / (m0_0 * m0_0) - - (gm / 2. - 1. / 2.) * (m1_0 * m2_0 * m2_0) / (m0_0 * m0_0)); - m3_2 = (1 - sq_e) * m3_2 - + sq_e - * (gm * (m2_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m2_0 * m2_0 * m2_0) / (m0_0 * m0_0) - - (gm / 2. - 1. / 2.) * (m2_0 * m1_0 * m1_0) / (m0_0 * m0_0)); - m3_3 = (1 - sxy_e) * m3_3; - - new_f(0, level, k, h) = .25 * m0_0 + .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3; - new_f(1, level, k, h) = .25 * m0_0 + .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3; - new_f(2, level, k, h) = .25 * m0_0 - .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3; - new_f(3, level, k, h) = .25 * m0_0 - .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3; - - new_f(4, level, k, h) = .25 * m1_0 + .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3; - new_f(5, level, k, h) = .25 * m1_0 + .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3; - new_f(6, level, k, h) = .25 * m1_0 - .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3; - new_f(7, level, k, h) = .25 * m1_0 - .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3; - - new_f(8, level, k, h) = .25 * m2_0 + .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3; - new_f(9, level, k, h) = .25 * m2_0 + .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3; - new_f(10, level, k, h) = .25 * m2_0 - .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3; - new_f(11, level, k, h) = .25 * m2_0 - .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3; - - new_f(12, level, k, h) = .25 * m3_0 + .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3; - new_f(13, level, k, h) = .25 * m3_0 + .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3; - new_f(14, level, k, h) = .25 * m3_0 - .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3; - new_f(15, level, k, h) = .25 * m3_0 - .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3; - }); - } std::swap(f.array(), new_f.array()); } template -void save_solution(Field& f, double eps, std::size_t ite, std::string ext = "") +void save_solution(Field& f, double eps, std::size_t ite, std::size_t freq_out, std::string ext = "") { using value_t = typename Field::value_type; - auto mesh = f.mesh(); - using mesh_id_t = typename decltype(mesh)::mesh_id_t; - - std::size_t min_level = mesh.min_level(); - std::size_t max_level = mesh.max_level(); - - std::stringstream str; - str << "LBM_D2Q4_3_Euler_" << ext << "_lmin_" << min_level << "_lmax-" << max_level << "_eps-" << eps << "_ite-" << ite; + if( ite % freq_out != 0 ) return; + auto& mesh = f.mesh(); auto level = samurai::make_field("level", mesh); auto rho = samurai::make_field("rho", mesh); auto qx = samurai::make_field("qx", mesh); @@ -467,7 +346,7 @@ void save_solution(Field& f, double eps, std::size_t ite, std::string ext = "") auto e = samurai::make_field("e", mesh); auto s = samurai::make_field("entropy", mesh); - samurai::for_each_cell(mesh[mesh_id_t::cells], + samurai::for_each_cell(mesh, [&](auto& cell) { level[cell] = cell.level; @@ -482,449 +361,119 @@ void save_solution(Field& f, double eps, std::size_t ite, std::string ext = "") s[cell] = std::log(p / std::pow(rho[cell], gm)); }); - samurai::save(str.str().data(), mesh, rho, qx, qy, e, s, f, level); + std::size_t min_level = mesh.min_level(); + std::size_t max_level = mesh.max_level(); + std::string filename = fmt::format("LBM_D2Q4_3_Euler_{}_lmin-{}_lmax-{}_eps-{}_ite-{}", ext, min_level, max_level, eps, ite); + samurai::save(filename, mesh, rho, qx, qy, e, s, f, level); } -// Attention : the number 2 as second template parameter does not mean -// that we are dealing with two fields!!!! -template -xt::xtensor -prediction_all(const Field& f, - std::size_t level_g, - std::size_t level, - const interval_t& k, - const ordinates_t& h, - std::map, xt::xtensor>& mem_map) +int main(int argc, char* argv[]) { - // That is used to employ _ with xtensor - using namespace xt::placeholders; - - // mem_map.clear(); // To be activated if we want to avoid memoization - auto it = mem_map.find({level_g, level, k, h}); - - if (it != mem_map.end() && k.size() == (std::get<2>(it->first)).size()) - { - return it->second; - } - else - { - auto mesh = f.mesh(); - using mesh_id_t = typename decltype(mesh)::mesh_id_t; - - // We put only the size in x (k.size()) because in y we only have slices - // of size 1. The second term (1) should be adapted according to the - // number of fields that we have. - std::vector shape_x = {k.size(), 16}; - xt::xtensor out = xt::empty(shape_x); - auto mask = mesh.exists(mesh_id_t::cells_and_ghosts, - level_g + level, - k, - h); // Check if we are on a leaf or a ghost (CHECK IF IT IS OK) - xt::xtensor mask_all = xt::empty(shape_x); - - for (int h_field = 0; h_field < 16; ++h_field) - { - xt::view(mask_all, xt::all(), h_field) = mask; - } - - if (xt::all(mask)) // Recursion finished - { - return xt::eval(f(0, 16, level_g + level, k, h)); - } + samurai::initialize(argc, argv); - // If we cannot stop here - auto kg = k >> 1; - kg.step = 1; - xt::xtensor val = xt::empty(shape_x); - /* - -------------------- - NW | N | NE - -------------------- - W | EARTH | E - -------------------- - SW | S | SE - -------------------- - */ - - auto earth = xt::eval(prediction_all(f, level_g, level - 1, kg, (h >> 1), mem_map)); - auto W = xt::eval(prediction_all(f, level_g, level - 1, kg - 1, (h >> 1), mem_map)); - auto E = xt::eval(prediction_all(f, level_g, level - 1, kg + 1, (h >> 1), mem_map)); - auto S = xt::eval(prediction_all(f, level_g, level - 1, kg, (h >> 1) - 1, mem_map)); - auto N = xt::eval(prediction_all(f, level_g, level - 1, kg, (h >> 1) + 1, mem_map)); - auto SW = xt::eval(prediction_all(f, level_g, level - 1, kg - 1, (h >> 1) - 1, mem_map)); - auto SE = xt::eval(prediction_all(f, level_g, level - 1, kg + 1, (h >> 1) - 1, mem_map)); - auto NW = xt::eval(prediction_all(f, level_g, level - 1, kg - 1, (h >> 1) + 1, mem_map)); - auto NE = xt::eval(prediction_all(f, level_g, level - 1, kg + 1, (h >> 1) + 1, mem_map)); - - // This is to deal with odd/even indices in the x direction - std::size_t start_even = (k.start & 1) ? 1 : 0; - std::size_t start_odd = (k.start & 1) ? 0 : 1; - std::size_t end_even = (k.end & 1) ? kg.size() : kg.size() - 1; - std::size_t end_odd = (k.end & 1) ? kg.size() - 1 : kg.size(); - - int delta_y = (h & 1) ? 1 : 0; - int m1_delta_y = (delta_y == 0) ? 1 : -1; // (-1)^(delta_y) - - xt::view(val, xt::range(start_even, _, 2)) = xt::view( - earth + 1. / 8 * (W - E) + 1. / 8 * m1_delta_y * (S - N) - 1. / 64 * m1_delta_y * (NE - NW - SE + SW), - xt::range(start_even, _)); - xt::view(val, xt::range(start_odd, _, 2)) = xt::view( - earth - 1. / 8 * (W - E) + 1. / 8 * m1_delta_y * (S - N) + 1. / 64 * m1_delta_y * (NE - NW - SE + SW), - xt::range(_, end_odd)); - - xt::masked_view(out, !mask_all) = xt::masked_view(val, !mask_all); - - for (int k_mask = 0, k_int = k.start; k_int < k.end; ++k_mask, ++k_int) - { - if (mask[k_mask]) - { - xt::view(out, k_mask) = xt::view(f(0, 16, level_g + level, {k_int, k_int + 1}, h), 0); - } - } + CLI::App app{"Multi resolution for a D2Q4 LBM scheme for the scalar advection equation"}; - // It is crucial to use insert and not [] in order not to update the - // value in case of duplicated (same key) - mem_map.insert(std::make_pair(std::tuple{level_g, level, k, h}, out)); - return out; - } -} + std::size_t freq_out = 1; // frequency for output in iteration + std::size_t total_nb_ite = 100; + int configuration = 12; -template -double compute_error(Field& f, FieldFull& f_full, Func&& update_bc_for_level) -{ - constexpr std::size_t size = Field::size; - using value_t = typename Field::value_type; + // Multiresolution parameters + std::size_t min_level = 2; + std::size_t max_level = 9; + double mr_epsilon = 1.e-3; // Threshold used by multiresolution + double mr_regularity = 2.; // Regularity guess for multiresolution - auto mesh = f.mesh(); - using mesh_id_t = typename decltype(mesh)::mesh_id_t; + std::size_t nt_loadbalance = 10; - auto min_level = mesh.min_level(); - auto max_level = mesh.max_level(); + app.add_option("--nb-ite", total_nb_ite, "number of iteration")->capture_default_str(); + app.add_option("--config", configuration, "Lax-Liu configuration")->capture_default_str(); + app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh")->capture_default_str() + ->group("Multiresolution"); + app.add_option("--mr-reg", mr_regularity, "The regularity criteria used by the multiresolution to adapt the mesh") + ->capture_default_str()->group("Multiresolution"); + CLI11_PARSE(app, argc, argv); - auto init_mesh = f_full.mesh(); + constexpr size_t dim = 2; + using Config = samurai::MRConfig; - samurai::update_ghost_mr(f, std::forward(update_bc_for_level)); + double lambda = 1. / 0.2; // This seems to work + double T = 0.25; // 0.3;//1.2; - auto f_reconstructed = samurai::make_field("f_reconstructed", init_mesh); - f_reconstructed.fill(0.); + double sq_rho = 1.9; + double sxy_rho = 1.; - // For memoization - using interval_t = typename Field::interval_t; // Type in X - using ordinates_t = typename interval_t::index_t; // Type in Y - std::map, xt::xtensor> memoization_map; - memoization_map.clear(); + double sq_q = 1.75; + double sxy_q = 1.; - double error = 0.; - double norm = 0.; - double dx = 1. / (1 << max_level); + double sq_e = 1.75; + double sxy_e = 1.; - for (std::size_t level = 0; level <= max_level; ++level) + if (configuration == 12) { - auto leaves_on_finest = samurai::intersection(mesh[mesh_id_t::cells][level], mesh[mesh_id_t::cells][level]); - - leaves_on_finest.on(max_level)( - [&](auto& interval, auto& index) - { - auto k = interval; - auto h = index[0]; - - f_reconstructed(max_level, k, h) = prediction_all(f, level, max_level - level, k, h, memoization_map); - - auto rho_reconstructed = f_reconstructed(0, max_level, k, h) + f_reconstructed(1, max_level, k, h) - + f_reconstructed(2, max_level, k, h) + f_reconstructed(3, max_level, k, h); - auto rho_full = f_full(0, max_level, k, h) + f_full(1, max_level, k, h) + f_full(2, max_level, k, h) - + f_full(3, max_level, k, h); - - error += xt::sum(xt::abs(rho_reconstructed - rho_full))[0]; - norm += xt::sum(xt::abs(rho_full))[0]; - }); + T = .25; } - return (error / norm); -} - -template -void save_reconstructed(Field& f, FieldFull& f_full, Func&& update_bc_for_level, double eps, std::size_t ite, std::string ext = "") -{ - constexpr std::size_t size = Field::size; - using value_t = typename Field::value_type; - - auto mesh = f.mesh(); - using mesh_id_t = typename decltype(mesh)::mesh_id_t; - - auto min_level = mesh.min_level(); - auto max_level = mesh.max_level(); - - auto init_mesh = f_full.mesh(); - - samurai::update_ghost_mr(f, std::forward(update_bc_for_level)); - - auto f_reconstructed = samurai::make_field("f_reconstructed", init_mesh); // To reconstruct all and - // see entropy - f_reconstructed.fill(0.); - - auto rho_reconstructed = samurai::make_field("rho_reconstructed", init_mesh); - auto qx_reconstructed = samurai::make_field("qx_reconstructed", init_mesh); - auto qy_reconstructed = samurai::make_field("qy_reconstructed", init_mesh); - auto E_reconstructed = samurai::make_field("E_reconstructed", init_mesh); - auto s_reconstructed = samurai::make_field("s_reconstructed", init_mesh); - auto level_ = samurai::make_field("level", init_mesh); - - auto rho = samurai::make_field("rho", init_mesh); - auto qx = samurai::make_field("qx", init_mesh); - auto qy = samurai::make_field("qy", init_mesh); - auto E = samurai::make_field("E", init_mesh); - auto s = samurai::make_field("s", init_mesh); - - // For memoization - using interval_t = typename Field::interval_t; // Type in X - using ordinates_t = typename interval_t::index_t; // Type in Y - std::map, xt::xtensor> memoization_map; - - memoization_map.clear(); - - for (std::size_t level = 0; level <= max_level; ++level) + else { - auto number_leaves = mesh.nb_cells(level, mesh_id_t::cells); - - auto leaves_on_finest = samurai::intersection(mesh[mesh_id_t::cells][level], mesh[mesh_id_t::cells][level]); - - leaves_on_finest.on(max_level)( - [&](auto& interval, auto& index) - { - auto k = interval; - auto h = index[0]; - - f_reconstructed(max_level, k, h) = prediction_all(f, level, max_level - level, k, h, memoization_map); - - level_(max_level, k, h) = level; - - rho_reconstructed(max_level, k, h) = f_reconstructed(0, max_level, k, h) + f_reconstructed(1, max_level, k, h) - + f_reconstructed(2, max_level, k, h) + f_reconstructed(3, max_level, k, h); - - qx_reconstructed(max_level, k, h) = f_reconstructed(4, max_level, k, h) + f_reconstructed(5, max_level, k, h) - + f_reconstructed(6, max_level, k, h) + f_reconstructed(7, max_level, k, h); + T = .3; + // T = 0.1; + } - qy_reconstructed(max_level, k, h) = f_reconstructed(8, max_level, k, h) + f_reconstructed(9, max_level, k, h) - + f_reconstructed(10, max_level, k, h) + f_reconstructed(11, max_level, k, h); + samurai::Box box({0, 0}, {1, 1}); + samurai::MRMesh mesh(box, min_level, max_level); - E_reconstructed(max_level, k, h) = f_reconstructed(12, max_level, k, h) + f_reconstructed(13, max_level, k, h) - + f_reconstructed(14, max_level, k, h) + f_reconstructed(15, max_level, k, h); + // Initialization + auto f = init_f(mesh, configuration, lambda); // Adaptive scheme + samurai::make_bc>(f, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.); - s_reconstructed(max_level, k, h) = xt::log( - ((gm - 1.) - * (E_reconstructed(max_level, k, h) - - .5 * (xt::pow(qx_reconstructed(max_level, k, h), 2.) + xt::pow(qy_reconstructed(max_level, k, h), 2.)) - / rho_reconstructed(max_level, k, h))) - / xt::pow(rho_reconstructed(max_level, k, h), gm)); + double dx = 1.0 / (1 << max_level); + double dt = dx / lambda; - rho(max_level, k, h) = f_full(0, max_level, k, h) + f_full(1, max_level, k, h) + f_full(2, max_level, k, h) - + f_full(3, max_level, k, h); + std::size_t N = static_cast(T / dt); - qx(max_level, k, h) = f_full(4, max_level, k, h) + f_full(5, max_level, k, h) + f_full(6, max_level, k, h) - + f_full(7, max_level, k, h); + // SFC_LoadBalancer_interval balancer; + // SFC_LoadBalancer_interval balancer; + // Load_balancing::Life balancer; + // Void_LoadBalancer balancer; + Diffusion_LoadBalancer_cell balancer; + // Diffusion_LoadBalancer_interval balancer; + // Load_balancing::Diffusion balancer; - qy(max_level, k, h) = f_full(8, max_level, k, h) + f_full(9, max_level, k, h) + f_full(10, max_level, k, h) - + f_full(11, max_level, k, h); - E(max_level, k, h) = f_full(12, max_level, k, h) + f_full(13, max_level, k, h) + f_full(14, max_level, k, h) - + f_full(15, max_level, k, h); - s(max_level, k, h) = xt::log( - ((gm - 1.) - * (E(max_level, k, h) - .5 * (xt::pow(qx(max_level, k, h), 2.) + xt::pow(qy(max_level, k, h), 2.)) / rho(max_level, k, h))) - / xt::pow(rho(max_level, k, h), gm)); - }); - } - - std::stringstream str; - str << "LBM_D2Q4_3_Euler_Reconstruction_" << ext << "_lmin_" << min_level << "_lmax-" << max_level << "_eps-" << eps << "_ite-" << ite; - - samurai::save(str.str().data(), - init_mesh, - rho_reconstructed, - qx_reconstructed, - qy_reconstructed, - E_reconstructed, - s_reconstructed, - rho, - qx, - qy, - E, - s, - level_); -} + auto MRadaptation = samurai::make_MRAdapt(f); -int main(int argc, char* argv[]) -{ - samurai::initialize(argc, argv); - - cxxopts::Options options("lbm_d2q4_3_Euler", - "Multi resolution for a D2Q4 LBM scheme for the " - "scalar advection equation"); - - options.add_options()("min_level", "minimum level", cxxopts::value()->default_value("2"))( - "max_level", - "maximum level", - cxxopts::value()->default_value("7"))("epsilon", "maximum level", cxxopts::value()->default_value("0.0001"))( - "ite", - "number of iteration", - cxxopts::value()->default_value("100"))("reg", "regularity", cxxopts::value()->default_value("0."))( - "config", - "Lax-Liu configuration", - cxxopts::value()->default_value("12"))("h, help", "Help"); - - try + double t = 0.; + samurai::times::timers.start("tloop"); + for (std::size_t nt = 0; nt <= N && nt < total_nb_ite; ++nt) { - auto result = options.parse(argc, argv); + std::cout << fmt::format("\n\t> Iteration {}, t: {}, dt: {} ", nt, t, dt ) << std::endl; - if (result.count("help")) + if (nt % nt_loadbalance == 0 && nt > 1) { - std::cout << options.help() << "\n"; + samurai::times::timers.start("tloop.lb"); + balancer.load_balance(mesh, f); + samurai::times::timers.stop("tloop.lb"); } - else - { - constexpr size_t dim = 2; - using Config = samurai::MROConfig; - - std::size_t min_level = result["min_level"].as(); - std::size_t max_level = result["max_level"].as(); - std::size_t total_nb_ite = result["ite"].as(); - double eps = result["epsilon"].as(); - double regularity = result["reg"].as(); - int configuration = result["config"].as(); - - // double lambda = 1./0.3; //4.0; - // double lambda = 1./0.2499; //4.0; - double lambda = 1. / 0.2; // This seems to work - double T = 0.25; // 0.3;//1.2; - - double sq_rho = 1.9; - double sxy_rho = 1.; - - double sq_q = 1.75; - double sxy_q = 1.; - double sq_e = 1.75; - double sxy_e = 1.; + samurai::times::timers.start("tloop.MRAdaptation"); + MRadaptation(mr_epsilon, mr_regularity); + samurai::times::timers.stop("tloop.MRAdaptation"); - if (configuration == 12) - { - T = .25; - } - else - { - T = .3; - // T = 0.1; - } - - // // This were the old test case (version 3) - // double sq = 1.75; - // double sxy = 2.; - // if (configuration == 12) { - // sxy = 1.5; - // T = 0.25; - // } - // else { - // sxy = 0.5; - // T = 0.3; - // } - - samurai::Box box({0, 0}, {1, 1}); - samurai::MROMesh mesh(box, min_level, max_level); - using mesh_id_t = typename samurai::MROMesh::mesh_id_t; - samurai::MROMesh mesh_ref{box, max_level, max_level}; - - using coord_index_t = typename samurai::MROMesh::coord_index_t; - auto pred_coeff = compute_prediction(min_level, max_level); - - // Initialization - auto f = init_f(mesh, configuration, lambda); // Adaptive scheme - auto f_ref = init_f(mesh_ref, configuration, lambda); // Reference scheme - - double dx = 1.0 / (1 << max_level); - double dt = dx / lambda; - - std::size_t N = static_cast(T / dt); - - std::string dirname("./LaxLiu/"); - std::string suffix("_Config_" + std::to_string(configuration) + "_min_" + std::to_string(min_level) + "_max_" - + std::to_string(max_level) + "_eps_" + std::to_string(eps)); - - // std::ofstream stream_number_leaves; - // stream_number_leaves.open - // (dirname+"number_leaves"+suffix+".dat"); - - // std::ofstream stream_number_cells; - // stream_number_cells.open (dirname+"number_cells"+suffix+".dat"); - - // std::ofstream stream_time_scheme_ref; - // stream_time_scheme_ref.open - // (dirname+"time_scheme_ref"+suffix+".dat"); - - // std::ofstream stream_number_leaves_ref; - // stream_number_leaves_ref.open - // (dirname+"number_leaves_ref"+suffix+".dat"); - - // std::ofstream stream_number_cells_ref; - // stream_number_cells_ref.open - // (dirname+"number_cells_ref"+suffix+".dat"); - - int howoften = 1; // How often is the solution saved ? - - auto update_bc_for_level = [](auto& field, std::size_t level) - { - update_bc_D2Q4_3_Euler_constant_extension(field, level); - }; + samurai::times::timers.start("tloop.LBM"); + one_time_step(f, lambda, sq_rho, sxy_rho, sq_q, sxy_q, sq_e, sxy_e); + samurai::times::timers.stop("tloop.LBM"); - auto MRadaptation = samurai::make_MRAdapt(f, update_bc_for_level); + samurai::times::timers.start("tloop.io"); + save_solution(f, mr_epsilon, nt, freq_out); + samurai::times::timers.stop("tloop.io"); - for (std::size_t nb_ite = 0; nb_ite <= N; ++nb_ite) - { - std::cout << std::endl << " Iteration number = " << nb_ite << std::endl; - - if (max_level > min_level) - { - MRadaptation(eps, regularity); - } - - if (nb_ite == N) - { - auto error_density = compute_error(f, f_ref, update_bc_for_level); - std::cout << std::endl << "#### Epsilon = " << eps << " error = " << error_density << std::endl; - save_solution(f, eps, nb_ite, std::string("final_")); - save_reconstructed(f, f_ref, update_bc_for_level, eps, nb_ite); - } - - // if (nb_ite % howoften == 0) { - // save_solution(f , eps, nb_ite/howoften, - // std::string("Config_")+std::to_string(configuration)); // - // Before applying the scheme - // } - - one_time_step(f, update_bc_for_level, pred_coeff, lambda, sq_rho, sxy_rho, sq_q, sxy_q, sq_e, sxy_e); - one_time_step(f_ref, update_bc_for_level, pred_coeff, lambda, sq_rho, sxy_rho, sq_q, sxy_q, sq_e, sxy_e); - - auto number_leaves = mesh.nb_cells(mesh_id_t::cells); - auto number_cells = mesh.nb_cells(); - - samurai::statistics("D2Q4444_Euler_Lax_Liu", mesh); - // stream_number_leaves< + +// CLI +#include + +// samurai +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +template +auto getLevel(Mesh& mesh) +{ + auto level = samurai::make_field("level", mesh); + + samurai::for_each_cell( mesh, [&]( auto & cell ) { + level[ cell ] = static_cast( cell.level ); + }); + + return level; +} + +template +auto init(Mesh& mesh, const double radius, const double x_center, const double y_center ) +{ + auto u = samurai::make_field("u", mesh); + + samurai::for_each_cell( mesh, [&](auto& cell) { + auto center = cell.center(); + double rad = 0.; + double z_center = 0.5; + + if constexpr ( dim == 3 ){ + rad = ((center[0] - x_center) * (center[0] - x_center) + + (center[1] - y_center) * (center[1] - y_center) + + (center[2] - z_center) * (center[2] - z_center) ); + }else{ + rad = ((center[0] - x_center) * (center[0] - x_center) + + (center[1] - y_center) * (center[1] - y_center) ); + } + if ( rad <= radius * radius) { + u[cell] = 1; + } else { + u[cell] = 0; + } + }); + + return u; +} + +template +auto initMultiCircles(Mesh& mesh, const std::vector &radius, const std::vector & centers ) +{ + auto u = samurai::make_field("u", mesh); + + samurai::for_each_cell( mesh, [&](auto& cell) { + u[ cell ] = 0.; + }); + + samurai::for_each_cell( mesh, [&](auto& cell) { + auto cc = cell.center(); + + for(size_t icerc=0; icerc +void upWind(int niter_benchmark, const Mesh_t & mesh, Field_t & u2, Field_t & unp1, double dt, Conv_t & conv) { + + for(int i=0; i +struct Config_test_load_balancing { + int niter_benchmark = 1; + int niter_loadbalance = 1; // number of iteration of load balancing + int ndomains = 1; // number of partition (mpi processes) + int rank = 0; // current mpi rank + std::size_t minlevel = 6; + std::size_t maxlevel = 8; + double dt = 0.1; + double mr_regularity = 1.0; + double mr_epsilon = 2.e-4; + xt::xtensor_fixed> minCorner = {0., 0., 0.}; + xt::xtensor_fixed> maxCorner = {1., 1., 1.}; + std::vector bubles_r; // bubles radius + std::vector> bubles_c; // bubles centers +}; + +template +Timers benchmark_loadbalancing( struct Config_test_load_balancing & conf, Load_Balancer_t && lb, Vel_t & velocity ){ + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + using mesh_id_t = typename Mesh_t::mesh_id_t; + + Timers myTimers; + + /** + * Initialize AMR mesh with spherical buble + */ + + samurai::Box box( conf.minCorner, conf.maxCorner ); + + Mesh_t mesh( box, conf.minlevel, conf.maxlevel ); + + // auto u = init( mesh, radius, x_center, y_center ); + myTimers.start("InitMesh"); + auto u = initMultiCircles( mesh, conf.bubles_r, conf.bubles_c ); + myTimers.stop("InitMesh"); + + auto lvl = getLevel( mesh ); + + samurai::make_bc>( u, 0. ); + + myTimers.start("mradapt"); + auto mradapt = samurai::make_MRAdapt( u ); + mradapt( conf.mr_epsilon, conf.mr_regularity ); + myTimers.stop("mradapt"); + + // samurai::save( "init_circle_"+std::to_string( ndomains)+"_domains", mesh, lvl ); + + myTimers.start( lb.getName() ); + for(int lb_iter=0; lb_iter( newmesh, radius, x_center, y_center ); + auto u2 = initMultiCircles( newmesh, conf.bubles_r, conf.bubles_c ); + samurai::save( "init2_circle_"+std::to_string( conf.ndomains )+"_domains", newmesh, u2 ); + + auto conv = samurai::make_convection_upwind( velocity ); + auto unp1 = samurai::make_field("unp1", newmesh); + + // myTimers.start( "update_ghost_mr_bf" ); + // for(int i=0; i conf; + + std::string filename = "ld_mesh_init"; + + double radius = 0.2, x_center = 0.5, y_center = 0.5; + bool multi = false; + + CLI::App app{"Load balancing test"}; + app.add_option("--iter-loadbalance", conf.niter_loadbalance, "Number of desired lb iteration") + ->capture_default_str()->group("Simulation parameters"); + app.add_option("--iter-bench", conf.niter_benchmark, "Number of iteration for bench")->capture_default_str()->group("Simulation parameters"); + app.add_option("--min-level", conf.minlevel, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--max-level", conf.maxlevel, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--mr-eps", conf.mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh") + ->capture_default_str()->group("Multiresolution"); + app.add_option("--mr-reg", conf.mr_regularity,"The regularity criteria used by the multiresolution to " + "adapt the mesh")->capture_default_str()->group("Multiresolution"); + app.add_option("--filename", filename, "File name prefix")->capture_default_str()->group("Ouput"); + app.add_option("--radius", radius, "Bubble radius")->capture_default_str()->group("Simulation parameters"); + app.add_option("--xcenter", x_center, "Bubble x-axis center")->capture_default_str()->group("Simulation parameters"); + app.add_option("--ycenter", y_center, "Bubble y-axis center")->capture_default_str()->group("Simulation parameters"); + app.add_flag("--multi", multi, "Multiple bubles")->group("Simulation parameters"); + + CLI11_PARSE(app, argc, argv); + + conf.dt = cfl / ( 1 << conf.maxlevel ); + + // Convection operator + samurai::VelocityVector velocity; + velocity.fill( 1 ); + velocity( 1 ) = -1; + + boost::mpi::communicator world; + conf.rank = world.rank(); + conf.ndomains = static_cast( world.size() ); + + if( multi ) { + conf.bubles_r = { 0.2, 0.1, 0.05}; + conf.bubles_c = {{ 0.2, 0.2, 0.2 }, { 0.5, 0.5, 0.5 }, { 0.8, 0.8, 0.8 }}; + }else{ + conf.bubles_r = { radius }; + conf.bubles_c = {{ x_center, y_center, 0.5 }}; + } + + if( conf.rank == 0 ) std::cerr << "\n\t> Testing 'Interval propagation load balancer' " << std::endl; + + if( conf.ndomains > 1 ) { // load balancing cells by cells using interface propagation + auto times = benchmark_loadbalancing( conf, Diffusion_LoadBalancer_interval (), velocity ); + times.print(); + world.barrier(); + } + + if( conf.rank == 0 ) std::cerr << "\n\t> Testing 'Gravity-based cell exchange' load balancer' " << std::endl; + + if( conf.ndomains > 1 ) { // load balancing cells by cells using gravity + auto times = benchmark_loadbalancing( conf, Diffusion_LoadBalancer_cell (), velocity ); + times.print(); + world.barrier(); + } + + if( conf.rank == 0 ) std::cerr << "\n\t> Testing Morton_LoadBalancer_interval " << std::endl; + + if( conf.ndomains > 1 ) { // load balancing using SFC morton + auto times = benchmark_loadbalancing( conf, SFC_LoadBalancer_interval (), velocity ); + times.print(); + world.barrier(); + } + + if( conf.rank == 0 ) std::cerr << "\n\t> Testing Hilbert_LoadBalancer_interval " << std::endl; + + if( conf.ndomains > 1 ) { // load balancing using SFC hilbert + auto times = benchmark_loadbalancing( conf, SFC_LoadBalancer_interval (), velocity ); + times.print(); + world.barrier(); + } + + if( conf.rank == 0 ) std::cerr << "\n\t> Testing no loadbalancing (initial partitionning) " << std::endl; + + { + auto times = benchmark_loadbalancing( conf, Void_LoadBalancer (), velocity ); + times.print(); + world.barrier(); + } + + samurai::finalize(); + + return 0; +} \ No newline at end of file diff --git a/demos/loadbalancing/partition.cpp b/demos/loadbalancing/partition.cpp new file mode 100644 index 000000000..128c0605f --- /dev/null +++ b/demos/loadbalancing/partition.cpp @@ -0,0 +1,318 @@ +#include + +// CLI +#include + +// samurai +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +template +auto init(Mesh& mesh, const double radius, const double x_center, const double y_center ) +{ + auto u = samurai::make_field("u", mesh); + + samurai::for_each_cell( mesh, [&](auto& cell) { + auto center = cell.center(); + double rad = ((center[0] - x_center) * (center[0] - x_center) + + (center[1] - y_center) * (center[1] - y_center) ); + if ( rad <= radius * radius) { + u[cell] = 1; + } else { + u[cell] = 0; + } + }); + + return u; +} + +template +void detectInterface( std::vector & meshes, std::vector & graph ){ + + std::cerr << "\t> [detectInterface] Number of mesh : " << meshes.size() << std::endl; + + for(int irank=0; irank Processing domain # " << irank << std::endl; + + for( int nbi=0; nbi Neighbour process # " << neighbour_rank << std::endl; + + for( int idim=0; idim<2*dim; ++idim ){ + + xt::xtensor_fixed> stencil; + stencil.fill( 0 ); + + stencil[ idim / 2 ] = 1 - ( idim % 2 ) * 2; + + std::cerr << "\t\t\t> Stencil {" << stencil[ 0 ] << ", " << stencil[ 1 ] << "}" << std::endl; + + size_t nbCellsIntersection = 0, nbIntervalIntersection = 0; + int minlevel = meshes[ neighbour_rank ].min_level(); + int maxlevel = meshes[ neighbour_rank ].max_level(); + + for( int level=minlevel; level<=maxlevel; ++level ) { + + // for each level look at level -1 / 0 / +1 + int minlevel_proj = meshes[ irank ].min_level(); + int maxlevel_proj = meshes[ irank ].max_level(); + for(size_t projlevel=std::max(minlevel_proj, level-1); projlevel<=std::min(maxlevel_proj, level+1); ++projlevel ){ + std::cerr << "\t\t\t> Looking intersection " << level << " onto level " << projlevel << std::endl; + + // intersection avec myself + // translate neighbour from dir (hopefully to current) + auto set = translate( meshes[ neighbour_rank ][ level ], stencil ); + auto intersect = intersection( set, meshes[ irank ][ projlevel ] ).on( projlevel ); + + size_t nbInter_ = 0, nbCells_ = 0; + intersect([&]( auto & i, auto & index ) { + nbInter_ += 1; + nbCells_ += i.size(); + }); + + // we get more interval / cells, than wanted because neighbour has bigger cells + if( nbInter_ > 0 && projlevel > level ){ + std::cerr << "\t\t\t> Too much cell selected ... " << std::endl; + auto set_ = translate( meshes[ irank ][ projlevel ], stencil ); + auto diff_ = difference( intersect, set_ ); + + nbInter_ = 0; + nbCells_ = 0; + diff_([&]( auto & i, auto & index ) { + nbInter_ += 1; + nbCells_ += i.size(); + }); + + } + + std::cerr << "\t\t\t> nbInter_ : " << nbInter_ << ", nbCells_ " << nbCells_ << std::endl; + + nbCellsIntersection = nbCells_ ; + nbIntervalIntersection = nbInter_ ; + + } + + } + std::cerr << "\t\t> Total Ncells intersected " << nbCellsIntersection << ", nb interval : " << nbIntervalIntersection << std::endl; + + } + + } + + } + +} + +int main( int argc, char * argv[] ){ + + samurai::initialize(argc, argv); + Timers myTimers; + + constexpr int dim = 2; + int ndomains = 1; + std::size_t minLevel = 3, maxLevel = 8; + double mr_regularity = 1.0, mr_epsilon = 2.e-4; + xt::xtensor_fixed> minCorner = {0., 0.}; + xt::xtensor_fixed> maxCorner = {1., 1.}; + std::string filename = "ld_mesh_init"; + + double radius = 0.2, x_center = 0.5, y_center = 0.5; + + CLI::App app{"Load balancing test"}; + app.add_option("--ndomains", ndomains, "Number of desired domains")->capture_default_str()->group("Simulation parameters"); + app.add_option("--min-corner", minCorner, "The min corner of the box")->capture_default_str()->group("Simulation parameters"); + app.add_option("--max-corner", maxCorner, "The max corner of the box")->capture_default_str()->group("Simulation parameters"); + app.add_option("--min-level", minLevel, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--max-level", maxLevel, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution"); + app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh") + ->capture_default_str()->group("Multiresolution"); + app.add_option("--mr-reg", mr_regularity,"The regularity criteria used by the multiresolution to " + "adapt the mesh")->capture_default_str()->group("Multiresolution"); + app.add_option("--filename", filename, "File name prefix")->capture_default_str()->group("Ouput"); + app.add_option("--radius", radius, "Bubble radius")->capture_default_str()->group("Simulation parameters"); + app.add_option("--xcenter", x_center, "Bubble x-axis center")->capture_default_str()->group("Simulation parameters"); + app.add_option("--ycenter", y_center, "Bubble y-axis center")->capture_default_str()->group("Simulation parameters"); + + CLI11_PARSE(app, argc, argv); + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + using CellList_t = typename Mesh_t::cl_type; + using CellArray_t = typename Mesh_t::ca_type; + + /** + * Initialize AMR mesh with spherical buble + */ + myTimers.start("InitMesh"); + // samurai::Box box( minCorner, maxCorner ); + + // Mesh_t mesh(box, minLevel, maxLevel); + + // auto u = init( mesh, radius, x_center, y_center ); + // samurai::make_bc(u, 0.); + // auto mradapt = samurai::make_MRAdapt( u ); + // mradapt( mr_epsilon, mr_regularity ); + + std::vector clists( 3 ); + std::vector carrays( 3 ); + + clists[0][1][{0}].add_interval({1, 4}); + clists[0][1][{1}].add_interval({1, 4}); + clists[0][1][{2}].add_interval({0, 4}); + clists[0][1][{3}].add_interval({0, 4}); + clists[0][2][{0}].add_interval({0, 2}); + clists[0][2][{1}].add_interval({0, 2}); + clists[0][2][{2}].add_interval({0, 2}); + clists[0][2][{3}].add_interval({0, 2}); + + // subdomain 2 + clists[1][0][{2}].add_interval({0, 4}); + clists[1][0][{3}].add_interval({0, 4}); + + // subdomain 3 + clists[2][0][{0}].add_interval({2, 4}); + clists[2][0][{1}].add_interval({2, 4}); + + carrays[0] = { clists[0], true }; + carrays[1] = { clists[1], true }; + carrays[2] = { clists[2], true }; + + myTimers.stop("InitMesh"); + + std::vector graph( 3 ); + { // fill graph info + + // find a way to compute this + graph[ 0 ].neighbour = {1, 2}; + graph[ 1 ].neighbour = {0, 2}; + graph[ 2 ].neighbour = {0, 1}; + + // fill "current" load ----> no MPI comm, local info + for(size_t i=0; i<3; ++i ){ + graph[ i ]._load = carrays[ i ].nb_cells(); + } + + // fill neighbour load -----> this will imply MPI comm + for(size_t i=0; i<3; ++i ){ + graph[ i ].load.resize( graph[ i ].neighbour.size() ); + for(size_t ni=0; ni( carrays, graph ); + + + CellArray_t mesh = { global, true }; + + auto init_rank = samurai::make_field( "init_rank", mesh); + + { // initialize fake_rank + + for(size_t m_=0; m_( m_ ); + }); + } + } + + } + + // output mesh + SAMURAI_LOG( "\t> Initial configurtion : " + filename ); + samurai::save( filename, mesh, init_rank ); + + myTimers.start( "load_balance_fluxes" ); + compute_load_balancing_fluxes( graph ); + myTimers.start( "load_balance_fluxes" ); + + // explicitly call load_balancing(); + + // std::cerr << "\n\n" << std::endl; + // SAMURAI_LOG( "[partition] Explicitly calling load_balancing() ... "); + + // for(int il=0; il<3; ++il ){ + // myTimers.start("load-balancing"); + // mesh.force_load_balancing(); + // myTimers.stop("load-balancing"); + + // // output mesh + // std::cerr << "\t> Dumping mesh to file : " << "load-balanced" << std::endl; + // samurai::save( "load-balanced_"+std::to_string(il), mesh, u ); + // } + + auto sfc_morton_rank = samurai::make_field( "sfc_morton_rank", mesh); + + myTimers.start( "balance_morton" ); + // perform_load_balancing_SFC( mesh, ndomains, sfc_morton_rank ); + myTimers.stop( "balance_morton" ); + + + auto diffusion_rank = samurai::make_field( "diffusion_rank", mesh); + + { // initialize fake_rank + + for(size_t m_=0; m_( m_ ); + }); + } + } + + } + + samurai::save( "load_balanced_diff_init_"+std::to_string( ndomains), mesh, sfc_morton_rank, diffusion_rank ); + + for_each_cell( mesh, [&](const auto & cell ){ + diffusion_rank[ cell ] = -1; + }); + + myTimers.start( "balance_diffusion" ); + perform_load_balancing_diffusion( mesh, carrays, ndomains, graph, diffusion_rank ); + myTimers.stop( "balance_diffusion" ); + + filename = "load_balanced_diff_" + std::to_string( ndomains); + SAMURAI_LOG( "\t> Generated file : " + filename ); + samurai::save( filename, mesh, sfc_morton_rank, diffusion_rank ); + + + myTimers.print(); + samurai::finalize(); + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/include/samurai/assertLogTrace.hpp b/include/samurai/assertLogTrace.hpp new file mode 100644 index 000000000..a2f771c20 --- /dev/null +++ b/include/samurai/assertLogTrace.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include + +#define SAMURAI_ASSERT(condition, msg) \ +do { \ + if (! (condition)) { \ + std::cerr << "Assertion failed \nin " << __FILE__ \ + << "\n @line " << __LINE__ << ": " << msg << std::endl; \ + std::terminate(); \ + } \ +} while (false) + +//#ifdef NDEBUG +#define SAMURAI_LOG( msg ) do { std::cerr << "SMR::Log:: " \ + << msg << std::endl; } while (0) + +#define SAMURAI_TRACE( msg ) do { std::cerr << "SMR::Trace[line " << __LINE__ << "] :" \ + << msg << std::endl; } while (0) +//#else +//#define MGS_LOG( msg ) +//#endif diff --git a/include/samurai/boundary.hpp b/include/samurai/boundary.hpp index 3e0fcdcb7..48531d56b 100644 --- a/include/samurai/boundary.hpp +++ b/include/samurai/boundary.hpp @@ -10,6 +10,9 @@ namespace samurai using mesh_id_t = typename Mesh::mesh_id_t; auto& cells = mesh[mesh_id_t::cells][level]; + // auto& domain = mesh.domain(); + // REBASE FIXME : I don't know if we need to override domain + //auto& domain = mesh.subdomain(); auto max_level = domain.level(); // domain.level();//mesh[mesh_id_t::cells].max_level(); auto one_interval = layer_width << (max_level - level); diff --git a/include/samurai/cell_array.hpp b/include/samurai/cell_array.hpp index ce7d625c7..f44e5886c 100644 --- a/include/samurai/cell_array.hpp +++ b/include/samurai/cell_array.hpp @@ -96,10 +96,8 @@ namespace samurai template const interval_t& get_interval(std::size_t level, const interval_t& interval, T... index) const; - template const interval_t& get_interval(std::size_t level, const interval_t& interval, const xt::xexpression& index) const; - template const interval_t& get_interval(std::size_t level, const xt::xexpression& coord) const; @@ -120,6 +118,9 @@ namespace samurai std::size_t nb_cells() const; std::size_t nb_cells(std::size_t level) const; + std::size_t nb_intervals(std::size_t dim) const; + std::size_t nb_intervals(std::size_t dim, std::size_t level) const; + std::size_t max_level() const; std::size_t min_level() const; @@ -384,6 +385,38 @@ namespace samurai return m_cells[level].nb_cells(); } + /** + * Return the number of intervals for a given dimension over the levels. + * + * @param d The dimension where to compute the number of intervals + * @return The number of intervals for the given dimension + */ + template + inline std::size_t CellArray::nb_intervals(std::size_t d) const + { + assert(d < dim); + std::size_t size = 0; + for (std::size_t level = 0; level <= max_size; ++level) + { + size += m_cells[level].nb_intervals(d); + } + return size; + } + + /** + * Return the number of intervals for a given dimension and a given level. + * + * @param d The dimension where to compute the number of intervals + * @param level The level where to compute the number of intervals + * @return The number of intervals for the given dimension + */ + template + inline std::size_t CellArray::nb_intervals(std::size_t d, std::size_t level) const + { + assert(d < dim); + return m_cells[level].nb_intervals(d); + } + /** * Return the maximum level where the array entry is not empty. */ diff --git a/include/samurai/cell_list.hpp b/include/samurai/cell_list.hpp index 24a5da7ef..7cb128d54 100644 --- a/include/samurai/cell_list.hpp +++ b/include/samurai/cell_list.hpp @@ -34,6 +34,17 @@ namespace samurai const lcl_type& operator[](std::size_t i) const; lcl_type& operator[](std::size_t i); + inline bool empty() const { + + bool rst = true; + + for(const auto & lcl : m_cells ){ + rst &= lcl.empty(); + } + + return rst; + } + void to_stream(std::ostream& os) const; auto& origin_point() const; diff --git a/include/samurai/hdf5.hpp b/include/samurai/hdf5.hpp index 94bbfc8ad..9d4a3b19b 100644 --- a/include/samurai/hdf5.hpp +++ b/include/samurai/hdf5.hpp @@ -93,7 +93,7 @@ namespace samurai { std::size_t index = 0; for_each_cell(submesh, - [&](auto cell) + [&](const auto & cell) { if constexpr (Field::size == 1) { @@ -136,7 +136,7 @@ namespace samurai std::size_t id = 0; std::size_t index = 0; for_each_cell(mesh, - [&](auto cell) + [&](const auto & cell) { std::array a; auto start_corner = cell.corner(); diff --git a/include/samurai/hilbert.hpp b/include/samurai/hilbert.hpp new file mode 100644 index 000000000..0ca52adca --- /dev/null +++ b/include/samurai/hilbert.hpp @@ -0,0 +1,425 @@ +#pragma once + +#include +#include "sfc.hpp" + +class Hilbert : public SFC { + + private: + // const int _nbits = sizeof(int) * 8; + + // State based algorithm + + // const std::vector>> _2D_STATE = {{{1, 0, 2, 3}, {0, 0, 2, 2}}, + // {{0, 3, 2, 1}, {1, 3, 1, 3}}, + // {{2, 1, 0, 3}, {3, 1, 3, 1}}, + // {{0, 1, 3, 2}, {2, 2, 0, 0}}}; + + // const std::vector>> _3D_STATE = {{{1,2,0,6,11,4,5,6,10,4,7,10}, + // {0,0,0,2,4,6,4,6,2,2,4,6}}, + // {{2,6,9,0,11,4,7,1,3,4,2,3}, + // {1,7,3,3,3,5,7,7,5,1,5,1}}, + // {{3,0,10,6,0,8,5,6,1,8,11,2}, + // {3,1,7,1,5,1,3,5,3,5,7,7}}, + // {{2,7,9,11,7,8,3,10,1,8,2,6}, + // {2,6,4,0,2,2,0,4,4,6,6,0}}, + // {{4,8,1,9,5,0,1,9,10,2,7,10}, + // {7,3,1,5,7,7,5,1,1,3,3,5}}, + // {{5,8,1,0,9,6,1,4,3,7,5,3}, + // {6,4,2,4,0,4,6,0,6,0,2,2}}, + // {{3,0,11,9,0,10,11,9,5,2,8,4}, + // {4,2,6,6,6,0,2,2,0,4,0,4}}, + // {{5,7,11,8,7,6,11,10,9,3,5,4}, + // {5,5,5,7,1,3,1,3,7,7,1,3}}}; + + // const std::vector>> _3D_STATE_r = {{{1,2,0,11,9,10,3,4,5,7,8,6}, + // {0,0,0,3,5,6,3,5,6,5,6,3}}, + // {{2,0,1,6,7,8,11,9,10,4,5,3}, + // {1,2,4,2,7,2,7,4,4,1,7,1}}, + // {{2,0,1,6,7,8,11,9,10,4,5,3}, + // {3,6,5,0,3,3,6,6,0,0,5,5}}, + // {{3,8,9,0,11,6,5,10,1,2,7,4}, + // {2,4,1,1,1,7,2,7,2,4,4,7}}, + // {{3,8,9,0,11,6,5,10,1,2,7,4}, + // {6,5,3,5,0,5,0,3,3,6,0,6}}, + // {{5,7,11,9,0,4,1,6,3,8,2,10}, + // {7,7,7,4,2,1,4,2,1,2,1,4}}, + // {{5,7,11,9,0,4,1,6,3,8,2,10}, + // {5,3,6,6,6,0,5,0,5,3,3,0}}, + // {{4,6,10,8,5,0,7,1,9,3,11,2}, + // {4,1,2,7,4,4,1,1,7,7,2,2}}}; + + + // def __init__(self, indexbits=16, dim=2): + // """ + // initialize hilbert class object + // """ + // assert indexbits > 0 + // assert 2 <= dim <= 3, "Dimension expected to be 2 or 3" + + // self._max_hindex = (2 ** indexbits) - 1 + // self._nbits = indexbits + // self._dimension = dim + + // # print("\t> Max H-index: 2^{}-1 = {} !".format(indexbits, self._max_hindex)) + + template + Coord_t AxestoTranspose( const Coord_t & c ) const { + + constexpr int _nbits = sizeof( uint32_t ) * 8; + + Coord_t _transposed; + for(size_t idim=0; idim 1 ){ + uint32_t p = q - 1; + for( size_t j = 0; j>= 1; + } + + // Gray code + for( int i=1; i 1 ){ + if( _transposed( dim - 1 ) & q ){ + t ^= ( q - 1 ); + } + q >>= 1; + } + + for(int i=0; i> 1 + // for i in range(self._dimension-1, 0, -1): + // x[i] ^= x[i-1] + // x[0] ^= t + + // q = 2 + // while q != m: + // p = q - 1 + // for i in range(self._dimension-1, -1, -1): + // if x[i] & q: + // x[0] ^= p + // else: + // t = (x[0] ^ x[i]) & p + // x[0] ^= t + // x[i] ^= t + // q <<= 1 + + public: + + inline std::string getName() const { return "Hilbert"; } + + template + inline SFC_key_t getKey_2D_impl( const Coord_t & lc ) const { + // SAMURAI_TRACE( "Hilbert::getKey_2D_impl Entering" ); + + // Be sure it is positive, shift must be done before + assert( lc( 0 ) >= 0 ); + assert( lc( 1 ) >= 0 ); + + constexpr int dim = 2; + constexpr int _nbits = sizeof( lc( 0 ) ) * 8; + + // std::cerr << "\t\t> Coord (" << coord(0) << ", " << coord(1) << std::endl; + xt::xtensor_fixed> coord = AxestoTranspose( lc ); + // std::cerr << "\t\t> Coord (" << coord(0) << ", " << coord(1) << std::endl; + + int nb = ( _nbits * dim ) - 1; + SFC_key_t tmp = 0; + + for(int jj=_nbits-1; jj>= 0; --jj){ + for( int ii=0; ii> jj; + // std::cerr << " jj : " << jj << ", a : " << a ; + + if( ( a & static_cast( 1 ) ) == 1 ){ + SFC_key_t ad = ( static_cast( 1 ) << nb ); + // std::cerr << " nb : " << nb; + tmp += ad; + } + nb --; + } + } + + return tmp; + } + + template + inline SFC_key_t getKey_3D_impl( const Coord_t & lc ) const { + // SAMURAI_TRACE( "Hilbert::getKey_2D_impl Entering" ); + + // Be sure it is positive, shift must be done before + assert( lc( 0 ) >= 0 ); + assert( lc( 1 ) >= 0 ); + assert( lc( 2 ) >= 0 ); + + constexpr int dim = 3; + constexpr int _nbits = sizeof( lc( 0 ) ) * 8; + + // std::cerr << "\t\t> Coord (" << coord(0) << ", " << coord(1) << std::endl; + xt::xtensor_fixed> coord = AxestoTranspose( lc ); + // std::cerr << "\t\t> Coord (" << coord(0) << ", " << coord(1) << std::endl; + + int nb = ( _nbits * dim ) - 1; + SFC_key_t tmp = 0; + + for(int jj=_nbits-1; jj>= 0; --jj){ + for( int ii=0; ii> jj; + // std::cerr << " jj : " << jj << ", a : " << a ; + + if( ( a & static_cast( 1 ) ) == 1 ){ + SFC_key_t ad = ( static_cast( 1 ) << nb ); + // std::cerr << " nb : " << nb; + tmp += ad; + } + nb --; + } + } + + return tmp; + } + + /* + * Return the hilbert index from logical coordinate (i,j) or (i,j,k) + * + * Parameters: + * lc : structure containing logical coordinate + * level : used for multiresolution + * Return : + * key : hilbert key + */ + // inline SFC_key_t getKey_2D_impl_state( const Logical_coord & lc, int lvl ) const { + // SAMURAI_TRACE( "Hilbert::getKey_2D_impl Entering " ); + + // SFC_key_t la_clef = 0; + + // constexpr int dim = 2; + // constexpr int twotondim = 1 << dim; + + // std::vector> ind_bits( lvl, std::vector( dim ) ); + // std::vector h_digits( lvl, 0 ); + + // SFC_key_t xyz[dim] = { lc.i, lc.j }; + + // // Set ind_bits for current point + // for (int ibit = 0; ibit < lvl; ++ibit) { + // for( int idim=0; idim> ibit) & 1; + // } + // } + // SAMURAI_TRACE( "Hilbert::getKey_2D_impl set ind_bits " ); + + // // Compute Hilbert key bits + // int cur_state = 0; + // int new_state; + // for(int ibit=lvl-1; ibit>-1; --ibit) { + // SAMURAI_LOG( "ibit : " + std::to_string( ibit ) ); + + // // Compute s_digit by interleaving bits + // int s_digit = 0; + // for(int idim=0; idim + // inline SFC_key_t getKey_3D_impl( const Coord_t & lc, int lvl ) const { + // SFC_key_t la_clef = 0; + + // constexpr int dim = 3; + + // int twotondim = 1 << dim; + // int ind_bits[lvl][dim]; + // int h_digits[lvl]; + + // SFC_key_t xyz[ dim ] = { lc( 0 ), lc( 1 ), lc( 2 ) }; + + // // Set ind_bits for current point + // for (int ibit = 0; ibit < lvl; ++ibit) { + // for(size_t idim=0; idim> ibit ) & 1; + // } + // } + + // // Compute Hilbert key bits + // int cur_state = 0; + // int new_state; + // for(int ibit=lvl-1; ibit>-1; --ibit) { + + // // Compute s_digit by interleaving bits + // int s_digit = 0; + // for(size_t idim=0; idim( std::pow(twotondim, ibit) * h_digits[ibit] ); + // } + + // return la_clef; + // } + + +}; + +// /* +// * Return the logical coordinate from a hilbert key encoded in long double. +// * +// * Parameters: +// * long double hkey : hilbert key on float128 +// * order : hilbert order (level) +// * dim : dimension should be 3D here +// */ +// inline std::vector get_logical_from_hilbert_3D(long double hkey, int order, int dim){ + +// int twotondim = 1 << dim; +// std::vector ind_array(dim); +// std::vector h_digits(order); +// std::vector> ind_bits( order, std::vector (dim)); + +// // Compute Hilbert key bits +// for(int ibit=0; ibit((hkey / std::pow(twotondim, ibit))) % twotondim); +// hkey -= h_digits[ibit] * std::pow(twotondim, ibit); +// } + +// // Compute indices bits +// int cur_state = 0, new_state, s_digit = 0; +// for(int ibit=order-1; ibit>-1; --ibit) { + +// // Compute the new s_digit from the state diagram +// new_state = _3D_STATE_r[h_digits[ibit]][0][cur_state]; +// s_digit = _3D_STATE_r[h_digits[ibit]][1][cur_state]; +// cur_state = new_state; + +// // Compute ind_bitd +// for (int idim = 0; idim < dim; ++idim) { +// ind_bits[ibit][idim] = (s_digit >> (dim - 1 - idim)) & 1; +// } +// } + +// // Set indices for current key +// for(int ibit=0; ibit( (hkey / std::pow( twotondim, ibit )) ) % twotondim); +// hkey -= h_digits[ibit] * std::pow(twotondim, ibit); +// } + +// // Compute indices bits +// int cur_state = 0, new_state, s_digit = 0; +// for(int ibit=level-1; ibit>-1; --ibit) { + +// // Compute the new s_digit from the state diagram +// new_state = _HILBERT_3D_STATE_r[h_digits[ibit]][0][cur_state]; +// s_digit = _HILBERT_3D_STATE_r[h_digits[ibit]][1][cur_state]; +// cur_state = new_state; + +// // Compute ind_bitd +// for (int idim = 0; idim < dim; ++idim) { +// ind_bits[ibit][idim] = (s_digit >> (dim - 1 - idim)) & 1; +// } +// } + +// // Set indices for current key +// for(int ibit=0; ibit...>, void>> + std::size_t get_interval_location(const interval_t& interval, T... index) const; + template + std::size_t get_interval_location(const interval_t& interval, const xt::xexpression& index) const; + template + std::size_t get_interval_location(const xt::xexpression& coord) const; + // get_interval template ...>, void>> const interval_t& get_interval(const interval_t& interval, T... index) const; @@ -151,6 +159,9 @@ namespace samurai //// Gives the total number of intervals auto nb_intervals() const; + //// Gives the number of intervals for a given dimension + std::size_t nb_intervals(std::size_t d) const; + //// Gives the number of cells std::size_t nb_cells() const; @@ -514,6 +525,37 @@ namespace samurai return const_reverse_iterator(cbegin()); } + /** + * Return the index in the x-interval array satisfying the input parameters + * + * @param interval The desired x-interval. + * @param index The desired indices for the other dimensions. + */ + template + template + inline std::size_t LevelCellArray::get_interval_location(const interval_t& interval, T... index) const + { + return static_cast(find(*this, {interval.start, index...})); + } + + template + template + inline std::size_t LevelCellArray::get_interval_location(const interval_t& interval, const xt::xexpression& index) const + { + xt::xtensor_fixed> point; + point[0] = interval.start; + xt::view(point, xt::range(1, _)) = index; + return static_cast(find(*this, point)); + } + + template + template + inline std::size_t LevelCellArray::get_interval_location(const xt::xexpression& coord) const + { + xt::xtensor_fixed> coord_array = coord; + return static_cast(find(*this, coord_array)); + } + /** * Return the x-interval satisfying the input parameters * @@ -640,6 +682,13 @@ namespace samurai return s; } + template + inline std::size_t LevelCellArray::nb_intervals(std::size_t d) const + { + assert(d < dim); + return m_cells[d].size(); + } + template inline std::size_t LevelCellArray::nb_cells() const { diff --git a/include/samurai/load_balancing.hpp b/include/samurai/load_balancing.hpp new file mode 100644 index 000000000..3c06ed3f5 --- /dev/null +++ b/include/samurai/load_balancing.hpp @@ -0,0 +1,1910 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "algorithm.hpp" +#include "algorithm/utils.hpp" +#include "hilbert.hpp" +#include "mesh.hpp" +#include "morton.hpp" +#include "mr/mesh.hpp" + +// statistics +#ifdef WITH_STATS +#include +#endif + +#ifdef SAMURAI_WITH_MPI +#include +#endif + + +#ifdef SAMURAI_WITH_MPI +namespace samurai +{ + + // namespace load_balance + // { + + // template + // Mesh_t merge(Mesh_t& mesh, const CellArray_t& lca) + // { + // using cl_type = typename Mesh_t::cl_type; + + // auto& refmesh = mesh[Mesh_t::mesh_id_t::cells]; + + // auto minlevel = std::min(refmesh.min_level(), lca.min_level()); + // auto maxlevel = std::max(refmesh.max_level(), lca.max_level()); + + // cl_type cl; + // for (size_t ilvl = minlevel; ilvl <= maxlevel; ++ilvl) + // { + // auto un = samurai::union_(refmesh[ilvl], lca[ilvl]); + + // un( + // [&](auto& interval, auto& indices) + // { + // cl[ilvl][indices].add_interval(interval); + // }); + // } + + // return Mesh_t(cl, minlevel, maxlevel); + // } + + // template + // Mesh_t remove(Mesh_t& mesh, CellArray_t& lca) + // { + // using cl_type = typename Mesh_t::cl_type; + + // auto& refmesh = mesh[Mesh_t::mesh_id_t::cells]; + + // auto minlevel = std::min(refmesh.min_level(), lca.min_level()); + // auto maxlevel = std::max(refmesh.max_level(), lca.max_level()); + + // // remove cells + // cl_type cl; + // size_t diff_ncells = 0; + // for (size_t ilvl = minlevel; ilvl <= maxlevel; ++ilvl) + // { + // auto diff = samurai::difference(refmesh[ilvl], lca[ilvl]); + + // diff( + // [&](auto& interval, auto& index) + // { + // cl[ilvl][index].add_interval(interval); + // diff_ncells += interval.size(); + // }); + // } + + // // new mesh for current process + // return Mesh_t(cl, minlevel, maxlevel); + // } + + // } + + struct MPI_Load_Balance + { + int32_t _load; + std::vector neighbour; + std::vector load; + std::vector fluxes; + }; + + enum Distance_t + { + L1, + L2, + LINF, + GRAVITY + }; + + enum Direction_t + { + FACE, + DIAG, + FACE_AND_DIAG + }; + + enum BalanceElement_t + { + CELL, + INTERVAL + }; + + enum Weight{ + OnSmall, + OnLarge, + None + }; + + static const double load_balancing_threshold = 0.01; // 0.03141592; // 2.5 % + // static const std::vector load_balancing_cell_weight = { 1., 0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, + // 0.0078125, 0.00390625, 0.001953125, 0.0009765625, + // 0.00048828125, 0.000244140625, 0.0001220703125 }; + + /** + * Compute distance base on different norm. + */ + + template + static inline double distance_l2(const Coord_t& d1, const Coord_t& d2) + { + double dist = 0.; + for (size_t idim = 0; idim < dim; ++idim) + { + double d = d1(idim) - d2(idim); + dist += d * d; + } + return std::sqrt(dist); + } + + template + static inline double distance_inf(const Coord_t& d1, const Coord_t& d2) + { + double dist = 0.; + for (size_t idim = 0; idim + static inline double distance_l1(const Coord_t& d1, const Coord_t& d2) + { + double dist = 0.; + for (size_t idim = 0; idim < dim; ++idim) + { + dist += std::abs(d1(idim) - d2(idim)); + } + return dist; + } + + /** + * Compute the load of the current process based on intervals or cells. It uses the + * mesh_id_t::cells to only consider leaves. + */ + template + static std::size_t cmptLoad(const Mesh_t& mesh) + { + using mesh_id_t = typename Mesh_t::mesh_id_t; + + const auto& current_mesh = mesh[mesh_id_t::cells]; + + std::size_t current_process_load = 0; + + if constexpr (elem == BalanceElement_t::CELL) + { + // cell-based load without weight. + samurai::for_each_interval(current_mesh, + [&]([[maybe_unused]] std::size_t level, const auto& interval, [[maybe_unused]] const auto& index) + { + current_process_load += interval.size(); // * load_balancing_cell_weight[ level ]; + }); + } + else + { + // interval-based load without weight + for (std::size_t level = current_mesh.min_level(); level <= current_mesh.max_level(); ++level) + { + current_process_load += current_mesh[level].shape()[0]; // only in x-axis ; + } + } + + return current_process_load; + } + + /** + * Compute fluxes based on load computing stategy based on graph with label + * propagation algorithm. Return, for the current process, the flux in term of + * load, i.e. the quantity of "load" to transfer to its neighbours. If the load + * is negative, it means that the process (current) must send load to neighbour, + * if positive it means that it must receive load. + * + * This function use 2 MPI all_gather calls. + * + */ + template + std::vector cmptFluxes( Mesh_t& mesh, const std::vector & neighbourhood, int niterations ) + { + + boost::mpi::communicator world; + + std::ofstream logs; + logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app ); + + size_t n_neighbours = neighbourhood.size(); + + // load of current process + int my_load = static_cast( cmptLoad( mesh ) ); + logs << "> Current process load : " << my_load << std::endl; + + // fluxes between processes + std::vector fluxes(n_neighbours, 0); + + // load of each process (all processes not only neighbours) + std::vector loads; + + // numbers of neighbours processes for each process, used for weighting fluxes + std::vector neighbourhood_n_neighbours; + + // number of neighbours for each process + boost::mpi::all_gather(world, neighbourhood.size(), neighbourhood_n_neighbours); + + // get "my_load" from other processes + int nt = 0; + while( nt < niterations ){ + boost::mpi::all_gather(world, my_load, loads); + + // compute updated my_load for current process based on its neighbourhood + int my_load_new = my_load; + for (std::size_t n_i = 0; n_i < n_neighbours; ++n_i) + { + std::size_t neighbour_rank = static_cast( neighbourhood[ n_i ] ); + int neighbour_load = loads[ neighbour_rank ]; + double diff_load = static_cast( neighbour_load - my_load_new); + + std::size_t nb_neighbours_neighbour = neighbourhood_n_neighbours[ neighbour_rank ]; + + double weight = 1. / static_cast( std::max( n_neighbours, nb_neighbours_neighbour ) + 1 ); + + // if transferLoad < 0 -> need to send data, if transferLoad > 0 need to receive data + int transfertLoad = static_cast( std::lround( weight * diff_load )) ; + + fluxes[ n_i ] += transfertLoad; + + // my_load_new += transfertLoad; + + my_load += transfertLoad; + } + + logs << fmt::format("\t> it {}, neighbours : ", nt) ; + for( size_t in=0; in( neighbourhood[ in ] ) ] << "], "; + logs << std::endl << "\t\t>fluxes : "; + for( size_t in=0; in( neighbourhood[ n_i ] ); + int abs_diff = std::abs( fluxes[ n_i ] ); + int threshold_neigh = static_cast( load_balancing_threshold * loads[ neighbour_rank ] ); + int threshold_curr = static_cast( load_balancing_threshold * my_load ); + + if( abs_diff < threshold_curr && abs_diff < threshold_neigh ){ + fluxes[ n_i ] = 0; + } + + } + + return fluxes; + } + + /** + * Compute fluxes based on load computing stategy based on graph with label + * propagation algorithm. Return, for the current process, the flux in term of + * load, i.e. the quantity of "load" to transfer to its neighbours. If the load + * is negative, it means that the process (current) must send load to neighbour, + * if positive it means that it must receive load. + * + * This function use 2 MPI all_gather calls. + * + */ + template + std::vector cmptFluxes(Mesh_t& mesh, int niterations) + { + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + + boost::mpi::communicator world; + + std::ofstream logs; + logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app ); + + // give access to geometricaly neighbour process rank and mesh + std::vector& neighbourhood = mesh.mpi_neighbourhood(); + size_t n_neighbours = neighbourhood.size(); + + // load of current process + int my_load = static_cast(cmptLoad(mesh)); + + logs << "> Current process load : " << my_load << std::endl; + + // fluxes between processes + std::vector fluxes(n_neighbours, 0); + + // load of each process (all processes not only neighbours) + std::vector loads; + + // numbers of neighbours processes for each process, used for weighting fluxes + std::vector neighbourhood_n_neighbours; + + // number of neighbours for each process + boost::mpi::all_gather(world, neighbourhood.size(), neighbourhood_n_neighbours); + + // get "my_load" from other processes + int nt = 0; + while( nt < niterations ){ + boost::mpi::all_gather(world, my_load, loads); + + // compute updated my_load for current process based on its neighbourhood + int my_load_new = my_load; + for (std::size_t n_i = 0; n_i < n_neighbours; ++n_i) + { + std::size_t neighbour_rank = static_cast(neighbourhood[n_i].rank); + int neighbour_load = loads[neighbour_rank]; + double diff_load = static_cast( neighbour_load - my_load_new); + + std::size_t nb_neighbours_neighbour = neighbourhood_n_neighbours[neighbour_rank]; + + double weight = 1. / static_cast(std::max(n_neighbours, nb_neighbours_neighbour) + 1); + + // if transferLoad < 0 -> need to send data, if transferLoad > 0 need to receive data + int transfertLoad = static_cast(std::lround(weight * diff_load)); + + fluxes[n_i] += transfertLoad; + + // my_load_new += transfertLoad; + + my_load += transfertLoad; + } + + logs << fmt::format("\t> it {}, neighbours : ", nt) ; + for( size_t in=0; in( neighbourhood[ in ].rank ) ] << "], "; + logs << std::endl << "\t> fluxes : "; + for( size_t in=0; in New theoretical load : " << my_load << std::endl; + + nt ++ ; + } + + // apply threshold, if the difference is smaller than #load_balancing_threshold of the number of cells, + // we do not load balance those processes + for (std::size_t n_i = 0; n_i < n_neighbours; ++n_i) + { + std::size_t neighbour_rank = static_cast( neighbourhood[ n_i ].rank ); + int abs_diff = std::abs( fluxes[ n_i ] ); + int threshold_neigh = static_cast( load_balancing_threshold * loads[ neighbour_rank ] ); + int threshold_curr = static_cast( load_balancing_threshold * my_load ); + + if( abs_diff < threshold_curr && abs_diff < threshold_neigh ){ + fluxes[ n_i ] = 0; + } + + } + + return fluxes; + } + + /** Is gravity the key ? That would be awesome =) + * This is not a distance but well does not change anything to the algorithm + * - G m m' / R ? + * + * What should be m ? m' ? Let's G = 1. + * + **/ + template + static inline double gravity(const Coord_t& d1, const Coord_t& d2) + { + double dist = distance_l2(d1, d2); + + // makes high level ( smallest ) cells to be exchanged more easily. 10 here is max + // level, hardcoded for tests. + // double m = 1./ static_cast( 1 << ( 10 - d1.level ) ) , m_p = 1.; + + // makes bigger cells to be exchanged more easily + // double m = 1./ static_cast( 1 << d1.level ) , m_p = 1.; + constexpr double G = 1.; + double m = 1., m_p = 1.; + double f = -G * m * m_p / (dist * dist); + + return f; + } + + // we are using Cell_t to allow ponderation using level; + template + inline constexpr double getDistance(const Coord_t& cc, const Coord_t& d) + { + static_assert(dim == 2 || dim == 3); + if constexpr (dist == Distance_t::L1) + { + return distance_l1(cc, d); + } + else if constexpr (dist == Distance_t::L2) + { + return distance_l2(cc, d); + } + else if constexpr (dist == Distance_t::LINF) + { + return distance_inf(cc, d); + } + else if constexpr (dist == Distance_t::GRAVITY) + { + return gravity(cc, d); + } + } + + template + bool cellExists( const Mesh_t & mesh, Id_t mesh_id, std::size_t level, const Coord_t & lo ){ + using CellList_t = typename Mesh_t::cl_type; + using CellArray_t = typename Mesh_t::ca_type; + + CellList_t tmp; + if constexpr( Mesh_t::dim == 2 ) { tmp[ level ][ { lo( 1 ) } ].add_point( lo( 0 ) ); } + if constexpr( Mesh_t::dim == 3 ) { tmp[ level ][ { lo( 1 ), lo( 2 ) } ].add_point( lo( 0 ) ); } + + CellArray_t tmp_ = { tmp, false }; + auto set = intersection( mesh[ mesh_id ][level], tmp_[level]); + + size_t ninterval = 0; + set( [&]([[maybe_unused]]const auto& i, [[maybe_unused]]const auto& index) { + ninterval ++ ; + }); + + return ninterval > 0 ? true : false ; + } + + template + class LoadBalancer + { + private: + + public: + std::ofstream logs; + int nloadbalancing; + + template + void update_field(Mesh_t& new_mesh, Field_t& field) + { + using mesh_id_t = typename Mesh_t::mesh_id_t; + using value_t = typename Field_t::value_type; + boost::mpi::communicator world; + + logs << fmt::format("\n# [LoadBalancer]::update_field rank # {} -> '{}' ", world.rank(), field.name() ) << std::endl; + + Field_t new_field("new_f", new_mesh); + new_field.fill(0); + + auto & old_mesh = field.mesh(); + + // auto min_level = boost::mpi::all_reduce(world, mesh[mesh_id_t::cells].min_level(), boost::mpi::minimum()); + // auto max_level = boost::mpi::all_reduce(world, mesh[mesh_id_t::cells].max_level(), boost::mpi::maximum()); + + auto min_level = old_mesh.min_level(); + auto max_level = old_mesh.max_level(); + + // copy data of intervals that are didn't move + for (std::size_t level = min_level; level <= max_level; ++level) + { + auto intersect_old_new = intersection(old_mesh[mesh_id_t::cells][level], new_mesh[mesh_id_t::cells][level]); + intersect_old_new.apply_op( samurai::copy( new_field, field ) ); + } + + std::vector req; + std::vector> to_send( static_cast( world.size() ) ); + + // FIXME: this is overkill and will not scale + std::vector all_new_meshes, all_old_meshes; + boost::mpi::all_gather( world, new_mesh, all_new_meshes ); + boost::mpi::all_gather( world, field.mesh(), all_old_meshes ); + + // build payload of field that has been sent to neighbour, so compare old mesh with new neighbour mesh + // for (auto& neighbour : new_mesh.mpi_neighbourhood()) + for( size_t ni=0; ni( ni ) == world.rank() ) continue; + + // auto & neighbour_new_mesh = neighbour.mesh; + auto & neighbour_new_mesh = all_new_meshes[ ni ]; + + for (std::size_t level = min_level; level <= max_level; ++level) + { + if (!old_mesh[mesh_id_t::cells][level].empty() && !neighbour_new_mesh[mesh_id_t::cells][level].empty()) + { + auto intersect_old_mesh_new_neigh = intersection( old_mesh[mesh_id_t::cells][level], neighbour_new_mesh[mesh_id_t::cells][level] ); + intersect_old_mesh_new_neigh( + [&](const auto & interval, const auto & index) + { + std::copy(field(level, interval, index).begin(), field(level, interval, index).end(), std::back_inserter(to_send[ni])); + }); + } + } + + if (to_send[ni].size() != 0) + { + // neighbour_rank = neighbour.rank; + auto neighbour_rank = static_cast( ni ); + req.push_back( world.isend( neighbour_rank, neighbour_rank, to_send[ ni ] ) ); + + logs << fmt::format("\t> [LoadBalancer]::update_field send data to rank # {}", neighbour_rank ) << std::endl; + } + } + + logs << fmt::format("\t> [LoadBalancer]::update_field number of isend request: {}", req.size() ) << std::endl; + + // build payload of field that I need to receive from neighbour, so compare NEW mesh with OLD neighbour mesh + for (size_t ni=0; ni( ni ) == world.rank() ) continue; + + bool isintersect = false; + for (std::size_t level = min_level; level <= max_level; ++level) + { + if (!new_mesh[mesh_id_t::cells][level].empty() && !all_old_meshes[ ni ][mesh_id_t::cells][level].empty()) + { + std::vector to_recv; + + auto in_interface = intersection( all_old_meshes[ ni ][mesh_id_t::cells][level], new_mesh[mesh_id_t::cells][level]); + + in_interface( + [&]( [[maybe_unused]]const auto& i, [[maybe_unused]]const auto& index) + { + isintersect = true; + }); + + if (isintersect) + { + break; + } + } + } + + if (isintersect) + { + std::ptrdiff_t count = 0; + std::vector to_recv; + world.recv( static_cast( ni ), world.rank(), to_recv); + + for (std::size_t level = min_level; level <= max_level; ++level) + { + if (!new_mesh[mesh_id_t::cells][level].empty() && !all_old_meshes[ ni ][mesh_id_t::cells][level].empty()) + { + auto in_interface = intersection(all_old_meshes[ ni ][mesh_id_t::cells][level], new_mesh[mesh_id_t::cells][level]); + + in_interface( + [&](const auto& i, const auto& index) + { + std::copy(to_recv.begin() + count, + to_recv.begin() + count + static_cast(i.size()*field.size), + new_field(level, i, index).begin()); + count += static_cast(i.size()*field.size); + + // std::cerr << fmt::format("Process {}, recv interval {}", world.rank(), i) << std::endl; + }); + } + } + } + } + + if (!req.empty()) + { + mpi::wait_all(req.begin(), req.end()); + } + + std::swap(field.array(), new_field.array()); + } + + template + void update_fields(Mesh_t& new_mesh, Field_t& field, Fields_t&... kw) + + { + update_field(new_mesh, field); + update_fields(new_mesh, kw...); + } + + template + void update_fields([[maybe_unused]]Mesh_t& new_mesh) + { + } + + public: + + LoadBalancer() { + boost::mpi::communicator world; + logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app ); + nloadbalancing = 0; + } + + ~LoadBalancer() { + logs.close(); + } + + /** + * This function reorder cells across MPI processes based on a + * Space Filling Curve. This is mandatory for load balancing using SFC. + */ + template + void reordering( Mesh_t & mesh, Field_t& field, Fields&... kw ) + { + // new reordered mesh on current process + MPI exchange with others + // auto new_mesh = static_cast(this)->reordering_impl( mesh ); + logs << "\t> Computing reordering flags ... " << std::endl; + auto flags = static_cast(this)->reordering_impl( mesh ); + + logs << "\t> Update mesh based on flags ... " << std::endl; + auto new_mesh = update_mesh( mesh, flags ); + + // update each physical field on the new reordered mesh + SAMURAI_TRACE("[Reordering::load_balance]::Updating fields ... "); + logs << "\t> Update fields based on flags ... " << std::endl; + update_fields( new_mesh, field, kw... ); + + // swap mesh reference. + // FIX: this is not clean + SAMURAI_TRACE("[Reordering::load_balance]::Swapping meshes ... "); + field.mesh().swap( new_mesh ); + + // discover neighbours: add new neighbours if a new interface appears or remove old neighbours + discover_neighbour( field.mesh() ); + discover_neighbour( field.mesh() ); + + } + + template + Mesh_t update_mesh( Mesh_t & mesh, const Field_t & flags ) { + + using CellList_t = typename Mesh_t::cl_type; + using CellArray_t = typename Mesh_t::ca_type; + + boost::mpi::communicator world; + + CellList_t new_cl; + std::vector payload( static_cast( world.size() ) ); + std::vector payload_size( static_cast( world.size()), 0 ); + + std::map comm; + + // build cell list for the current process && cells lists of cells for other processes + samurai::for_each_cell( mesh[Mesh_t::mesh_id_t::cells], [&]( const auto & cell ){ + + if( flags[ cell ] == world.rank() ){ + if constexpr ( Mesh_t::dim == 1 ){ new_cl[ cell.level ][ {} ].add_point( cell.indices[ 0 ] ); } + if constexpr ( Mesh_t::dim == 2 ){ new_cl[ cell.level ][ { cell.indices[ 1 ] } ].add_point( cell.indices[ 0 ] ); } + if constexpr ( Mesh_t::dim == 3 ){ new_cl[ cell.level ][ { cell.indices[ 1 ], cell.indices[ 2 ] } ].add_point( cell.indices[ 0 ] ); } + }else{ + assert( static_cast( flags[ cell ] ) < payload.size() ); + + if( comm.find( flags[ cell ] ) == comm.end() ) { + comm[ flags[ cell ] ] = true; + } + + if constexpr ( Mesh_t::dim == 1 ){ payload[ static_cast( flags[ cell ] ) ][ cell.level ][ {} ].add_point( cell.indices[ 0 ] ); } + if constexpr ( Mesh_t::dim == 2 ){ payload[ static_cast( flags[ cell ] ) ][ cell.level ][ { cell.indices[ 1 ] } ].add_point( cell.indices[ 0 ] ); } + if constexpr ( Mesh_t::dim == 3 ){ payload[ static_cast( flags[ cell ] ) ][ cell.level ][ { cell.indices[ 1 ], cell.indices[ 2 ] } ].add_point( cell.indices[ 0 ] ); } + + payload_size[ static_cast( flags[ cell ] ) ] ++; + } + + }); + + logs << "\t\t>[Load_balancer::update_mesh] Comm required with processes : ["; + for( const auto & it : comm ) + logs << it.first << fmt::format(" ({} cells),", payload_size[ static_cast( it.first ) ]); + logs << "]" << std::endl; + + std::vector req_send( static_cast( world.size() ), 0 ), req_recv( static_cast( world.size() ), 0 ); + + // Required to know communication pattern + for( int iproc=0; iproc( iproc ) ].empty() ) reqExchg = 0; + + req_send[ static_cast( iproc ) ] = reqExchg; + + world.send( iproc, 42, reqExchg ); + + } + + for( int iproc=0; iproc( iproc ) ] ); + } + + for( int iproc=0; iproc( iproc ) ], req_recv[ static_cast( iproc ) ] ) << std::endl;; + } + + std::vector req; + + // actual data echange between processes that need to exchange data + for(int iproc=0; iproc( iproc ) ] == 1 ){ + CellArray_t to_send = { payload[ static_cast( iproc ) ], false }; + + req.push_back( world.isend( iproc, 17, to_send ) ); + + logs << fmt::format("\t> Sending to # {}", iproc ) << std::endl; + } + } + + for(int iproc=0; iproc( iproc ) ] == 1 ) { + CellArray_t to_rcv; + logs << fmt::format("\t> Recving from # {}", iproc ) << std::endl; + world.recv( iproc, 17, to_rcv ); + + samurai::for_each_interval(to_rcv, [&](std::size_t level, const auto & interval, const auto & index ){ + new_cl[ level ][ index ].add_interval( interval ); + }); + } + } + + boost::mpi::wait_all( req.begin(), req.end() ); + + Mesh_t new_mesh( new_cl, mesh ); + + return new_mesh; + } + + template + void load_balance(Mesh_t & mesh, Field_t& field, Fields&... kw) + { + + std::string lbn = static_cast(this)->getName(); + + logs << fmt::format("\n###################################################" ) << std::endl; + logs << fmt::format("> Load balancing ({}) mesh @ iteration {} ", lbn, nloadbalancing ) << std::endl; + logs << fmt::format("###################################################\n") << std::endl; + + auto p = lbn.find( "SFC" ); + if( nloadbalancing == 0 && p != std::string::npos ) { + reordering( mesh , field, kw... ); + } + + // specific load balancing strategy + // auto new_mesh = static_cast(this)->load_balance_impl( field.mesh() ); + + auto flags = static_cast(this)->load_balance_impl( field.mesh() ); + + auto new_mesh = update_mesh( mesh, flags ); + + // update each physical field on the new load balanced mesh + SAMURAI_TRACE("[LoadBalancer::load_balance]::Updating fields ... "); + update_fields( new_mesh, field, kw... ); + + // swap mesh reference to new load balanced mesh. FIX: this is not clean + SAMURAI_TRACE("[LoadBalancer::load_balance]::Swapping meshes ... "); + field.mesh().swap( new_mesh ); + + // discover neighbours: add new neighbours if a new interface appears or remove old neighbours + // FIX: add boolean return to condition the need of another call, might save some MPI comm. + SAMURAI_TRACE("[LoadBalancer::load_balance]::discover neighbours ... "); + discover_neighbour( field.mesh() ); + discover_neighbour( field.mesh() ); + + nloadbalancing += 1; + } + + /* + * Call balancer function that evaluate if load balancing is required or not based on balncer internal strategy + * + */ + template + bool require_balance( Mesh_t & mesh ) { + return static_cast(this)->require_balance_impl( mesh ); + } + + /** + * This function MUST be used for debug or analysis purposes of load balancing strategy, + * it involves a lots of MPI communications. + * + * Try to evaluate / compute a load balancing score. We expect from a good load + * balancing strategy to: + * - optimize the number of neighbours (reduce comm.) + * - optimize the number of ghosts cells (reduce comm.) + * - load balance charge between processes + * - optimize the size of interval (samurai specific, expect better perf, better simd) + */ + template + void evaluate_balancing(Mesh& mesh) const + { + boost::mpi::communicator world; + + if (world.rank() == 0) + { + SAMURAI_TRACE("[LoadBalancer::evaluate_balancing]::Entering function ... "); + } + + std::vector load_cells, load_interval; + { + int my_load_i = static_cast(cmptLoad(mesh)); + boost::mpi::all_gather(world, my_load_i, load_interval); + + int my_load_c = static_cast(cmptLoad(mesh)); + boost::mpi::all_gather(world, my_load_c, load_cells); + } + + // if (world.rank() == 0) + // { + // std::cerr << "\t> LoadBalancer statistics : " << std::endl; + + // std::vector::iterator min_load = std::min_element(load_cells.begin(), load_cells.end()); + // auto rank = std::distance(load_cells.begin(), min_load); + // std::cerr << "\t\t> Min load {" << *min_load << ", cells } @ rank # " << rank << std::endl; + + // std::vector::iterator max_load = std::max_element(load_cells.begin(), load_cells.end()); + // rank = std::distance(load_cells.begin(), max_load); + // std::cerr << "\t\t> Max load {" << *max_load << ", cells } @ rank # " << rank << std::endl; + // } + + // std::string _stats = fmt::format("statistics_process_{}", world.rank()); + // samurai::Statistics s(_stats); + + // s("stats", mesh); + + // s( "statistics" , mesh ); + + // no need to implement this in derived class, could be general. + // + // static_cast(this)->evaluate_impl( mesh, kw... ); + } + }; + + /** + * Precomputed direction to face-to-face element for both 3D and 2D + */ + template + constexpr auto getDirectionFace() + { + using base_stencil = xt::xtensor_fixed>; + + constexpr size_t size_ = 2 * dim; + xt::xtensor_fixed> stencils; + std::size_t nstencils = size_; + for (std::size_t ist = 0; ist < nstencils; ++ist) + { + stencils[ist].fill(0); + stencils[ist][ist / 2] = 1 - (ist % 2) * 2; + } + // if constexpr( dim == 2 ){ + // return xt::xtensor_fixed> {{{1, 0}, {-1, 0}, {0, 1}, {0, -1}}}; + // } + + return stencils; + } + + /** + * Precomputed direction to diagonal element for both 3D and 2D cases. + */ + template + constexpr auto getDirectionDiag() + { + using base_stencil = xt::xtensor_fixed>; + + static_assert(dim == 2 || dim == 3); + + if constexpr (dim == 2) + { + return xt::xtensor_fixed>{ + {{1, 1}, {1, -1}, {-1, 1}, {-1, -1}} + }; + } + + if constexpr (dim == 3) + { + return xt::xtensor_fixed>{ + {{1, 1, -1}, {1, -1, -1}, {-1, 1, -1}, {-1, -1, -1}, {1, 0, -1}, {-1, 0, -1}, {0, 1, -1}, + {0, -1, -1}, {1, 1, 0}, {1, -1, 0}, {-1, 1, 0}, {-1, -1, 0}, {1, 1, 1}, {1, -1, 1}, + {-1, 1, 1}, {-1, -1, 1}, {1, 0, 1}, {-1, 0, 1}, {0, 1, 1}, {0, -1, 1}} + }; + } + } + + /** + * Precompute direction to element in all direction face + diagonals: 26 in 3D, 8 in 2D + */ + template + constexpr auto getDirectionFaceAndDiag() + { + using base_stencil = xt::xtensor_fixed>; + + static_assert(dim == 2 || dim == 3); + + if constexpr (dim == 2) + { + return xt::xtensor_fixed>{ + {{-1, 0}, {1, 0}, {1, 1}, {1, -1}, {0, -1}, {0, 1}, {-1, 1}, {-1, -1}} + }; + } + + if constexpr (dim == 3) + { + return xt::xtensor_fixed>{ + {{1, 0, 0}, {-1, 0, 0}, {0, 1, 0}, {0, -1, 0}, {0, 0, 1}, {0, 0, -1}, {1, 1, -1}, {1, -1, -1}, {-1, 1, -1}, + {-1, -1, -1}, {1, 0, -1}, {-1, 0, -1}, {0, 1, -1}, {0, -1, -1}, {1, 1, 0}, {1, -1, 0}, {-1, 1, 0}, {-1, -1, 0}, + {1, 1, 1}, {1, -1, 1}, {-1, 1, 1}, {-1, -1, 1}, {1, 0, 1}, {-1, 0, 1}, {0, 1, 1}, {0, -1, 1}} + }; + } + } + + template + inline auto getDirection() + { + static_assert(dim == 2 || dim == 3); + if constexpr (dir == Direction_t::FACE) + { + return getDirectionFace(); + } + else if constexpr (dir == Direction_t::DIAG) + { + return getDirectionDiag(); + } + else if constexpr (dir == Direction_t::FACE_AND_DIAG) + { + return getDirectionFaceAndDiag(); + } + } + + /** + * Compute the barycenter of the mesh given in parameter. + * + * Feat: adjust wght which is hardcoded to 1. for now. + * by passing in parameters an array for example to modulate + * weight according to level + */ + template + xt::xtensor_fixed> _cmpCellBarycenter(Mesh_t& mesh) + { + using Coord_t = xt::xtensor_fixed>; + + Coord_t bary; + bary.fill(0.); + + double wght_tot = 0.; + samurai::for_each_cell(mesh, + [&](auto& cell) + { + double wght = 1.; + + if constexpr( w == Weight::OnSmall ){ + wght = 1. / static_cast( 1 << (mesh.max_level() - cell.level) ); + } + + if constexpr( w == Weight::OnLarge ){ + wght = 1. / static_cast( 1 << cell.level ); + } + + auto cc = cell.center(); + + for (size_t idim = 0; idim < dim; ++idim) + { + bary(idim) += cc(idim) * wght; + } + + wght_tot += wght; + }); + + wght_tot = std::max(wght_tot, 1e-12); + for (size_t idim = 0; idim < dim; ++idim) + { + bary(idim) /= wght_tot; + } + + return bary; + } + + + + /** + * + * Params: + * leveldiff : max level difference. For 2:1 balance, leveldiff = 1 + */ + template + static auto cmptInterface(const Mesh_t& mesh, const Mesh_t& omesh) + { + using CellList_t = typename Mesh_t::cl_type; + using CellArray_t = typename Mesh_t::ca_type; + using mesh_id_t = typename Mesh_t::mesh_id_t; + + CellList_t interface; + + // operation are on leaves only + auto& currentMesh = mesh[mesh_id_t::cells]; + auto& otherMesh = omesh[mesh_id_t::cells]; + + // direction for translation to cmpt interface + auto dirs = getDirection(); + + for (size_t ist = 0; ist < dirs.size(); ++ist) + { + const auto& stencil = dirs[ist]; + + size_t minlevel = otherMesh.min_level(); + size_t maxlevel = otherMesh.max_level(); + + for (size_t level = minlevel; level <= maxlevel; ++level) + { + // for each level we need to check level -1 / 0 / +1 + std::size_t minlevel_check = static_cast( + std::max(static_cast(currentMesh.min_level()), static_cast(level) - static_cast(leveldiff))); + std::size_t maxlevel_check = std::min(currentMesh.max_level(), level + leveldiff); + + for (size_t projlevel = minlevel_check; projlevel <= maxlevel_check; ++projlevel) + { + // translate neighbour from dir (hopefully to current) all direction are tested + auto set = translate(otherMesh[level], stencil); + auto intersect = intersection(set, currentMesh[projlevel]).on(projlevel); + + size_t nbInter_ = 0, nbCells = 0; + intersect( + [&](auto& interval, [[maybe_unused]] auto& index) + { + nbInter_ += 1; + nbCells += interval.size(); + }); + + // we get more interval / cells, than wanted because neighbour has bigger cells + // 2:1 balance required here if not, need another loop... + if (nbInter_ > 0 && projlevel > level) + { + static_assert(leveldiff == 1); // for now at least + auto set_ = translate(currentMesh[projlevel], stencil); + auto diff_ = difference(intersect, set_); + + diff_( + [&](auto& interval, auto& index) + { + interface[projlevel][index].add_interval(interval); + }); + } + else + { + if (nbInter_ > 0) + { + intersect( + [&](auto& interval, auto& index) + { + interface[projlevel][index].add_interval(interval); + }); + } + } + } + } + } + + CellArray_t interface_ = {interface, false}; + + return interface_; + } + + /** + * + */ + template + static auto cmptInterfaceUniform(Mesh_t& mesh, Mesh_t& omesh, const Dir_t & dir ) + { + + // dim == 3 is not supported for the moment. + assert( dim == 2 ); + + using CellList_t = typename Mesh_t::cl_type; + using CellArray_t = typename Mesh_t::ca_type; + using mesh_id_t = typename Mesh_t::mesh_id_t; + + // operation are on leaves only + auto& currentMesh = mesh[mesh_id_t::cells]; + auto& otherMesh = omesh[mesh_id_t::cells]; + + size_t minlevel = otherMesh.min_level(); + size_t maxlevel = otherMesh.max_level(); + + size_t minLevelAtInterface = 99; + + struct MinMax { + int min_x = std::numeric_limits::max(); + int max_x = std::numeric_limits::min(); + int min_y = std::numeric_limits::max(); + int max_y = std::numeric_limits::min(); + }; + + boost::mpi::communicator world; + std::ofstream logs; + logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app ); + + logs << fmt::format("\t\t\t> Allocating vector of size : max({}, {})+2 : {}", mesh.max_level(), omesh.max_level(), + std::max( mesh.max_level(), omesh.max_level() ) + 2 ) << std::endl; + + size_t msize = std::max( mesh.max_level(), omesh.max_level() ) + 2; + std::vector< MinMax > mm ( msize ); + + for (size_t level = minlevel; level <= maxlevel; ++level) + { + // for each level we need to check level -1 / 0 / +1 + std::size_t minlevel_check = static_cast( + std::max(static_cast(currentMesh.min_level()), static_cast(level - 1) )); + std::size_t maxlevel_check = std::min(currentMesh.max_level(), level + static_cast( 1 ) ); + + for (size_t projlevel = minlevel_check; projlevel <= maxlevel_check; ++projlevel) + { + // translate neighbour from dir (hopefully to current) + auto set = translate( otherMesh[level], dir ); + auto intersect = intersection(set, currentMesh[projlevel]).on(projlevel); + + size_t nbInter_ = 0; + intersect( [&]( const auto & interval, const auto & index ) { + nbInter_ += 1; + + mm[ projlevel ].min_x = std::min( interval.start, mm[ projlevel ].min_x ); + mm[ projlevel ].max_x = std::max( interval.end, mm[ projlevel ].max_x ); + mm[ projlevel ].min_y = std::min( index(0), mm[ projlevel ].min_y ); + mm[ projlevel ].max_y = std::max( index(0), mm[ projlevel ].max_y ); + }); + + if (nbInter_ > 0){ + minLevelAtInterface = std::min( projlevel, minLevelAtInterface ); + } + } + + } + + if( minLevelAtInterface == 99 ) { + logs << "\t\t\t> No interface found ... " << std::endl; + + CellList_t tmp; + CellArray_t ca_tmp = { tmp, false }; + return ca_tmp; + } + + logs << fmt::format("\t\t\t> minLevelAtInterface : {}", minLevelAtInterface ) << std::endl; + logs << fmt::format( "\t\t\t> Interface min, max : x ({},{}); min, max : y ({},{}) @ level : {}", mm[ minLevelAtInterface ].min_x, + mm[ minLevelAtInterface ].max_x, mm[ minLevelAtInterface ].min_y, mm[ minLevelAtInterface ].max_y, minLevelAtInterface ) << std::endl; + + struct MinMax global; + global.min_x = mm[ minLevelAtInterface ].min_x; + global.max_x = mm[ minLevelAtInterface ].max_x; + global.min_y = mm[ minLevelAtInterface ].min_y; + global.max_y = mm[ minLevelAtInterface ].max_y; + + for( size_t level=minLevelAtInterface+1; level At level {}, diff {}", level, diff_level ) << std::endl; + // logs << fmt::format( "\t\t\t> Origin min, max : x ({},{}); min, max : y ({},{})", mm[ level ].min_x, + // mm[ level ].max_x, mm[ level ].min_y, mm[ level ].max_y ) << std::endl; + // logs << fmt::format( "\t\t\t> Eq minlevel, min, max : x ({},{}); min, max : y ({},{}) ", mm[ level ].min_x >> diff_level, + // mm[ level ].max_x >> diff_level, mm[ level ].min_y >> diff_level, mm[ level ].max_y >> diff_level ) << std::endl; + + global.min_x = std::min( global.min_x, mm[ level ].min_x >> diff_level ); + global.max_x = std::max( global.max_x, mm[ level ].max_x >> diff_level ); + global.min_y = std::min( global.min_y, mm[ level ].min_y >> diff_level ); + global.max_y = std::max( global.max_y, mm[ level ].max_y >> diff_level ); + } + + CellList_t tmp; + + // +y, -y : horizontal propagation + if( std::abs( dir[0] ) == 0 && std::abs( dir[1] ) == 1 ){ + logs << "\t> Horizontal propagation ! " << std::endl; + if( dir[ 1 ] > 0 ){ + tmp[ minLevelAtInterface ][ { global.min_y } ].add_interval( { global.min_x, global.max_x } ); + } else { + tmp[ minLevelAtInterface ][ { global.max_y } ].add_interval( { global.min_x, global.max_x } ); + } + } + + // +x, -x : vertical propagation + if( std::abs( dir[0] ) == 1 && std::abs( dir[1] ) == 0 ){ + logs << "\t> Vertical propagation ! " << std::endl; + for(int y=global.min_y; y<=global.max_y; ++y ){ + tmp[ minLevelAtInterface ][ { y } ].add_interval( { global.min_x, global.max_x } ); + } + } + + + CellArray_t ca_tmp = { tmp, false }; + + logs << "Interface for propagation : " << std::endl; + logs << ca_tmp << std::endl; + logs << std::endl; + + return ca_tmp; + } + + /** + * Compute cells at the interface between geometricaly adjacent + * domains. It relies on neighbourhood mpi_subdomain_t data structure + * computed in Mesh_t. + * + * It returns a vector containing the number of cells (nc) and number of + * intervals (ni) for each neighbour (0 to n), in that order. + * + * Example: nc_0, ni_0, nc_1, ni_1, ... + * + */ + // template + // void _computeCartesianInterface( Mesh_t & mesh, Field_t & field ){ + template + static auto _computeCartesianInterface(Mesh_t& mesh) + { + using CellList_t = typename Mesh_t::cl_type; + using CellArray_t = typename Mesh_t::ca_type; + using mesh_id_t = typename Mesh_t::mesh_id_t; + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + + // give access to geometricaly neighbour process rank and mesh + std::vector& neighbourhood = mesh.mpi_neighbourhood(); + std::size_t n_neighbours = neighbourhood.size(); + + std::vector interface(n_neighbours); + + // operation are on leaves only + auto currentMesh = mesh[mesh_id_t::cells]; + + // looks like using only FACE is better than using DIAG or FACE_AND_DIAG + auto dirs = getDirection(); + + for (std::size_t nbi = 0; nbi < n_neighbours; ++nbi) + { + auto neighbour_mesh = neighbourhood[nbi].mesh[mesh_id_t::cells]; + + for (size_t ist = 0; ist < dirs.size(); ++ist) + { + const auto& stencil = dirs[ist]; + + size_t minlevel = neighbour_mesh.min_level(); + size_t maxlevel = neighbour_mesh.max_level(); + + for (size_t level = minlevel; level <= maxlevel; ++level) + { + // for each level we need to check level -1 / 0 / +1 + std::size_t minlevel_check = static_cast( + std::max(static_cast(currentMesh.min_level()), static_cast(level) - 1)); + std::size_t maxlevel_check = std::min(currentMesh.max_level(), level + static_cast(1)); + + for (std::size_t projlevel = minlevel_check; projlevel <= maxlevel_check; ++projlevel) + { + // translate neighbour from dir (hopefully to current) all direction are tested + auto set = translate(neighbour_mesh[level], stencil); + auto intersect = intersection(set, currentMesh[projlevel]).on(projlevel); + + size_t nbInter_ = 0, nbCells_ = 0; + intersect( + [&](auto& interval, [[maybe_unused]] auto& index) + { + nbInter_ += 1; + nbCells_ += interval.size(); + }); + + // we get more interval / cells, than wanted because neighbour has bigger cells + if (nbInter_ > 0 && projlevel > level) + { + auto set_ = translate(currentMesh[projlevel], stencil); + auto diff_ = difference(intersect, set_); + + nbInter_ = 0; + nbCells_ = 0; + diff_( + [&](auto& interval, auto& index) + { + nbInter_ += 1; + nbCells_ += interval.size(); + // field( projlevel, i, index ) = world.rank() * 10; + interface[nbi][projlevel][index].add_interval(interval); + }); + } + else + { + if (nbInter_ > 0) + { + intersect( + [&](auto& interval, auto& index) + { + interface[nbi][projlevel][index].add_interval(interval); + }); + } + } + } + } + } + } + + std::vector interface_(n_neighbours); + for (std::size_t nbi = 0; nbi < n_neighbours; ++nbi) + { + interface_[nbi] = {interface[nbi], true}; + } + + return interface_; + } + + /** + * Compute fluxes of cells between MPI processes. In -fake- MPI environment. To + * use it in true MPI juste remove the loop over "irank", and replace irank by myrank; + * + */ + void compute_load_balancing_fluxes(std::vector& all) + { + for (size_t irank = 0; irank < all.size(); ++irank) + { + // number of cells + // supposing each cell has a cost of 1. ( no level dependency ) + int32_t load = all[irank]._load; + + std::size_t n_neighbours = all[irank].neighbour.size(); + + { + std::cerr << "[compute_load_balancing_fluxes] Process # " << irank << " load : " << load << std::endl; + std::cerr << "[compute_load_balancing_fluxes] Process # " << irank << " nneighbours : " << n_neighbours << std::endl; + std::cerr << "[compute_load_balancing_fluxes] Process # " << irank << " neighbours : "; + for (size_t in = 0; in < all[irank].neighbour.size(); ++in) + { + std::cerr << all[irank].neighbour[in] << ", "; + } + std::cerr << std::endl; + } + + // load of each process (all processes not only neighbour) + std::vector loads; + + // data "load" to transfer to neighbour processes + all[irank].fluxes.resize(n_neighbours); + std::fill(all[irank].fluxes.begin(), all[irank].fluxes.end(), 0); + + const std::size_t n_iterations = 1; + + for (std::size_t k = 0; k < n_iterations; ++k) + { + // numbers of neighboors processes for each neighbour process + std::vector nb_neighbours; + + if (irank == 0) + { + std::cerr << "[compute_load_balancing_fluxes] Fluxes iteration # " << k << std::endl; + } + + // // get info from processes + // mpi::all_gather(world, load, loads); + // mpi::all_gather(world, m_mpi_neighbourhood.size(), nb_neighbours); + + // load of current process + int32_t load_np1 = load; + + // compute updated load for current process based on its neighbourhood + for (std::size_t j_rank = 0; j_rank < n_neighbours; ++j_rank) + { + auto neighbour_rank = static_cast(all[irank].neighbour[j_rank]); + auto neighbour_load = all[irank].load[j_rank]; + auto diff_load = neighbour_load - load; + + std::size_t nb_neighbours_neighbour = all[neighbour_rank].neighbour.size(); + + double weight = 1. / static_cast(std::max(n_neighbours, nb_neighbours_neighbour) + 1); + + int32_t transfertLoad = static_cast(std::lround(weight * static_cast(diff_load))); + + all[irank].fluxes[j_rank] += transfertLoad; + + load_np1 += transfertLoad; + } + + // do check on load & fluxes ? + + { + std::cerr << "fluxes : "; + for (size_t in = 0; in < n_neighbours; ++in) + { + std::cerr << all[irank].fluxes[in] << ", "; + } + std::cerr << std::endl; + } + + // load_transfer( load_fluxes ); + + load = load_np1; + } + } + } + + /** + * Check if there is an intersection between two meshes. We move one mesh into the direction + * of the other based on "dir" stencils (template parameter). Here, we rely on the 2:1 balance. + * By default, stencils are "1" based, which means that we move the mesh by one unit element. This + * could be changed if necessary by multiplying the stencils by an integer value. + */ + template + bool intersectionExists(Mesh_t& meshA, Mesh_t& meshB) + { + using mesh_id_t = typename Mesh_t::mesh_id_t; + + constexpr size_t dim = Mesh_t::dim; + + // operation are on leaves cells only + auto & meshA_ = meshA[mesh_id_t::cells]; + auto & meshB_ = meshB[mesh_id_t::cells]; + + // get stencils for direction + auto dirs = getDirection(); + + for (size_t ist = 0; ist < dirs.size(); ++ist) + { + const auto& stencil = dirs[ist]; + + size_t minlevel = meshB_.min_level(); + size_t maxlevel = meshB_.max_level(); + + for (size_t level = minlevel; level <= maxlevel; ++level) + { + // for each level we need to check level -1 / 0 / +1 + std::size_t minlevel_check = static_cast( + std::max(static_cast(meshA_.min_level()), static_cast(level) - 1)); + std::size_t maxlevel_check = std::min(meshA_.max_level(), level + static_cast(1)); + + for (std::size_t projlevel = minlevel_check; projlevel <= maxlevel_check; ++projlevel) + { + // translate meshB_ in "dir" direction + auto set = translate(meshB_[level], stencil); + auto intersect = intersection(set, meshA_[projlevel]).on(projlevel); + + size_t nbInter_ = 0; + intersect( + [&]([[maybe_unused]] auto& interval, [[maybe_unused]] auto& index) + { + nbInter_ += 1; + }); + + if (nbInter_ > 0) + { + return true; + } + } + } + } + + return false; + } + + /** + * Discover new neighbour connection that might arise during load balancing + */ + template + void discover_neighbour(Mesh_t& mesh) + { + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + + boost::mpi::communicator world; + + // DEBUG + std::ofstream logs; + logs.open("log_" + std::to_string(world.rank()) + ".dat", std::ofstream::app); + logs << "\n# [LoadBalancer] discover_neighbour" << std::endl; + + // give access to geometricaly neighbour process rank and mesh + std::vector& neighbourhood = mesh.mpi_neighbourhood(); + + // bool requireGeneralUpdate = false; + + std::vector keepNeighbour(neighbourhood.size(), true); + + std::vector> neighbourConnection(neighbourhood.size()); + + // for each neighbour we check if there is an intersection with another one. + for (size_t nbi = 0; nbi < neighbourhood.size(); ++nbi) + { + // check current - neighbour connection + keepNeighbour[nbi] = intersectionExists(mesh, neighbourhood[nbi].mesh); + + // require update if a connection is lost + // requireGeneralUpdate = requireGeneralUpdate || ( ! keepNeighbour[ nbi ] ); + + if (!keepNeighbour[nbi]) + { + logs << fmt::format("\t> Loosing neighbour connection with {}", neighbourhood[nbi].rank) << std::endl; + } + + // check neighbour - neighbour connection + for (size_t nbj = nbi + 1; nbj < neighbourhood.size(); ++nbj) + { + logs << fmt::format("\t> Checking neighbourhood connection {} <-> {}", neighbourhood[nbi].rank, neighbourhood[nbj].rank) + << std::endl; + bool connected = intersectionExists(neighbourhood[nbi].mesh, neighbourhood[nbj].mesh); + + if (connected) + { + neighbourConnection[nbi].emplace_back(neighbourhood[nbj].rank); + neighbourConnection[nbj].emplace_back(neighbourhood[nbi].rank); + } + } + } + + // communicate to neighbours the list of connection they have + for (size_t nbi = 0; nbi < neighbourhood.size(); ++nbi) + { + // debug + logs << "\t> Sending computed neighbour: {"; + for (const auto& i : neighbourConnection[nbi]) + { + logs << i << ", "; + } + logs << "} to process " << neighbourhood[nbi].rank << std::endl; + // end debug + + world.send(neighbourhood[nbi].rank, 28, neighbourConnection[nbi]); + } + + // map of current MPI neighbours processes rank + std::map _tmp; // key = rank, value = required + for (size_t nbi = 0; nbi < neighbourhood.size(); ++nbi) + { + if (keepNeighbour[nbi]) + { + _tmp.emplace(std::make_pair(neighbourhood[nbi].rank, true)); + } + } + + logs << "\t> Receiving computed neighbour connection list from neighbour processes " << std::endl; + for (size_t nbi = 0; nbi < neighbourhood.size(); ++nbi) + { + std::vector _rcv_neighbour_list; + world.recv(neighbourhood[nbi].rank, 28, _rcv_neighbour_list); + + for (size_t in = 0; in < _rcv_neighbour_list.size(); ++in) + { + if (_tmp.find(_rcv_neighbour_list[in]) == _tmp.end()) + { + logs << "\t\t> New neighbour detected : " << _rcv_neighbour_list[in] << std::endl; + + _tmp[_rcv_neighbour_list[in]] = true; + // requireGeneralUpdate = requireGeneralUpdate || true; + } + } + } + + world.barrier(); + + // update neighbours ranks + neighbourhood.clear(); + for (const auto& ni : _tmp) + { + neighbourhood.emplace_back(ni.first); + } + + // debug + logs << "\t> New neighbourhood : {"; + for (size_t nbi = 0; nbi < neighbourhood.size(); ++nbi) + { + logs << neighbourhood[nbi].rank << ", "; + } + logs << "}" << std::endl; + // end debug + + // gather neighbour mesh + mesh.update_mesh_neighbour(); + + } + +} // namespace samurai +#endif + + +/** + * This function perform a "fake" load-balancing by updating an integer scalar field containing the rank + * of the cell after the load balancing; This is done using MORTON SFC. + * + * This does not require to have the graph. + */ +// template +// void perform_load_balancing_SFC( Mesh & mesh, int ndomains, Field_t & fake_mpi_rank ) { + +// using Config = samurai::MRConfig; +// using Mesh_t = samurai::MRMesh; +// using cell_t = typename Mesh::cell_t; + +// SFC sfc; + +// // SFC key (used for implicit sorting through map mechanism) +// std::map sfc_map; + +// int sfc_max_level = mesh.max_level(); // for now but remember must <= 21 for Morton + +// SFC_key_t min=std::numeric_limits::max(), max=std::numeric_limits::min(); + +// samurai::for_each_cell( mesh, [&]( const auto & cell ){ + +// // ij coordinate of cell +// double dxmax = samurai::cell_length( sfc_max_level ); + +// auto tmp = cell.center() / dxmax; + +// xt::xtensor_fixed> ij = { static_cast( tmp( 0 ) ), +// static_cast( tmp( 1 ) ) }; + +// auto key = sfc.template getKey( ij ); +// // std::cerr << "\t> Coord (" << ij( 0 ) << ", " << ij( 1 ) << ") ----> " << key << std::endl; + +// sfc_map[ key ] = cell ; + +// }); + +// size_t nbcells_tot = mesh.nb_cells( Mesh::mesh_id_t::cells ); //load -balancing on leaves +// size_t ncellsPerProc = std::floor( nbcells_tot / ndomains ); + +// // std::cerr << "\n\t> Morton index [" << min << ", " << max << "]" << std::endl; +// std::cerr << "\t> Total number of cells : " << nbcells_tot << std::endl; +// std::cout << "\t> Perfect load-balancing (weight-cell = 1.) : " << static_cast( nbcells_tot / ndomains ) +// << " / MPI" << std::endl; + +// size_t cindex = 0; +// for(const auto & item : sfc_map ) { +// auto sfc_key = item.first; + +// int fake_rank = std::floor( cindex / ncellsPerProc ); +// if ( fake_rank >= ndomains ) fake_rank = ndomains - 1; + +// fake_mpi_rank[ item.second ] = fake_rank; +// cindex ++; +// } + +// } + +// enum Interval_CellPosition { FIRST, LAST }; + +// template +// void perform_load_balancing_SFC_Interval( Mesh & mesh, int ndomains, Field_t & fake_mpi_rank, const Interval_CellPosition &icp ) { + +// using inter_t = samurai::Interval; + +// struct Data { +// size_t level; +// inter_t interval; +// xt::xtensor_fixed> indices; +// }; + +// // SFC key (used for implicit sorting through map mechanism) +// std::map sfc_map; + +// SFC sfc; +// int sfc_max_level = mesh.max_level(); // for now but remember must <= 21 for Morton + +// SFC_key_t min=std::numeric_limits::max(), max=std::numeric_limits::min(); + +// size_t ninterval = 0; +// samurai::for_each_interval( mesh, [&]( std::size_t level, const auto& inter, const auto& index ){ + +// // get Logical coordinate or first cell +// xt::xtensor_fixed> icell; + +// icp == Interval_CellPosition::LAST ? icell( 0 ) = inter.end - 1 : icell( 0 ) = inter.start; + +// for(int idim=0; idim> ijk; + +// if constexpr ( dim == 2 ) ijk = { static_cast( icell( 0 ) ), +// static_cast( icell( 1 ) ) }; + +// if constexpr ( dim == 3 ) ijk = { static_cast( icell( 0 ) ), +// static_cast( icell( 1 ) ), +// static_cast( icell( 2 ) ) }; + +// sfc_map[ sfc.getKey( ijk ) ] = { level, inter, index }; + +// ninterval ++; +// }); + +// size_t ninterPerProc = std::floor( ninterval / ndomains ); + +// // std::cerr << "\n\t> Morton index [" << min << ", " << max << "]" << std::endl; +// std::cerr << "\t> Total number of interval : " << ninterval << std::endl; +// std::cout << "\t> Perfect load-balancing (weight-interval = 1.) : " << static_cast( ninterPerProc ) +// << " / MPI" << std::endl; + +// size_t cindex = 0; +// for(const auto & item : sfc_map ) { +// int fake_rank = std::floor( cindex / ninterPerProc ); +// if ( fake_rank >= ndomains ) fake_rank = ndomains - 1; + +// fake_mpi_rank( item.second.level, item.second.interval, item.second.indices ) = fake_rank; + +// cindex ++; +// } + +// } + +// /** +// * +// * Global contains the global mesh. Since this is a toy function, it contains the union of all mesh. +// * meshes contains the mesh of each MPI process ( or let say the mesh of neighbour processes ...) +// * +// */ +// template +// void perform_load_balancing_diffusion( AMesh_t & global, std::vector & meshes, int ndomains, +// const std::vector & all, Field_t & fake_mpi_rank ) { + +// using CellList_t = typename AMesh_t::cl_type; + +// struct Coord_t { xt::xtensor_fixed> coord; }; + +// std::vector barycenters ( ndomains ); + +// { // compute barycenter of current domains ( here, all domains since with simulate multiple MPI domains) + +// int maxlevel = 4; +// for( size_t m_=0; m_ < meshes.size(); ++m_ ) { + +// double wght_tot = 0.; +// samurai::for_each_cell( meshes[ m_ ], [&]( const auto & cell ) { + +// // [OPTIMIZATION] precompute weight as array +// double wght = 1. / ( 1 << ( maxlevel - cell.level ) ); + +// const auto cc = cell.center(); + +// barycenters[ m_ ].coord( 0 ) += cc( 0 ) * wght; +// barycenters[ m_ ].coord( 1 ) += cc( 1 ) * wght; +// if constexpr ( dim == 3 ) { barycenters[ m_ ].coord( 2 ) += cc( 2 ) * wght; } + +// wght_tot += wght; + +// }); + +// barycenters[ m_ ].coord( 0 ) /= wght_tot; +// barycenters[ m_ ].coord( 1 ) /= wght_tot; +// if constexpr ( dim == 3 ) barycenters[ m_ ].coord( 2 ) /= wght_tot; + +// std::cerr << "\t> Domain # " << m_ << ", bc : {" << barycenters[ m_ ].coord( 0 ) << ", " +// << barycenters[ m_ ].coord( 1 ) << "}" << std::endl; + +// } + +// } + +// std::vector new_meshes( ndomains ); +// std::vector exchanged( ndomains ); + +// for( std::size_t m_ = 0; m_ < meshes.size(); ++m_ ){ // over each domains + +// std::cerr << "\t> Working on domains # " << m_ << std::endl; + +// // auto dist = samurai::make_field( "rank", meshes[ m_ ] ); + +// // id des neighbours dans le tableau de la structure MPI_Load_Balance +// // attention différent du rank mpi ! +// std::vector id_send; + +// int n_neighbours = static_cast( all[ m_ ].neighbour.size() ); +// for(std::size_t nbi = 0; nbi Neighbour rank : " << nbi_rank << std::endl; +// std::cerr << "\t\t> Neighbour flux : " << all[ m_ ].fluxes[ nbi ] << std::endl; + +// if( nbCellsToTransfer < 0 ){ + +// id_send.emplace_back( nbi ); + +// // Logical_coord_t stencil; +// // { // Compute the stencil or normalized direction to neighbour +// // Coord_t tmp; +// // double n2 = 0; +// // for(int idim = 0; idim( tmp.coord( idim ) / 0.5 ); +// // } + +// // std::cerr << "\t\t> stencil for neighbour # " << nbi_rank << " : "; +// // for(size_t idim=0; idim Number RECV : " << id_send.size() << std::endl; + +// std::vector already_given( n_neighbours, 0 ); + +// for_each_interval( meshes[ m_ ], [&]( std::size_t level, const auto& interval, const auto& index ){ +// Coord_t ibar; + +// std::cerr << "\t\t> Interval [" << interval.start << ", " << interval.end << "[" << std::endl; + +// double dm = 1. / (1 << level ); +// ibar.coord( 0 ) = ( (interval.end - interval.start) * 0.5 + interval.start ) * dm ; +// ibar.coord( 1 ) = ( index( 1 ) * dm ) + dm * 0.5; +// if constexpr ( dim == 3 ) ibar( 2 ) = ( index( 2 ) * dm ) + dm * 0.5; + +// std::cerr << "\t\t\t> level " << level << " center @ {" << ibar.coord(0) << ", " << ibar.coord(1) << "}" << std::endl; + +// // find which neighbour will potentially receive this interval +// int winner_id = -1; // keep it to current if still negative +// double winner_dist = std::numeric_limits::max(); +// for( int nbi=0; nbi Dist to neighbour #" << neighbour_rank << " : " << dist << " vs " << winner_dist << std::endl; +// std::cerr << "\t\t\t> Already given to this neighbour " << already_given[ neighbour_rank ] << std::endl; +// std::cerr << "\t\t\t> NbCells of this interval " << interval.size() << std::endl; +// std::cerr << "\t\t\t> fluxes for this neighbour : " << all[ m_ ].fluxes[ id_send[ nbi ] ] << std::endl; + +// if( dist < winner_dist && +// already_given[ neighbour_rank ] + interval.size() <= (- all[ m_ ].fluxes[ id_send[ nbi ] ]) ){ + +// winner_id = id_send[ nbi ]; +// winner_dist = dist; +// } + +// } + +// if( winner_id >= 0 ){ +// auto neighbour_rank = all[ m_ ].neighbour[ winner_id ]; +// std::cerr << "\t> Interval given to process " << neighbour_rank << " + ncells : " << interval.size() << std::endl; +// exchanged[ neighbour_rank ][ level ][ index ].add_interval( interval ); +// already_given[ neighbour_rank ] += interval.size(); +// }else{ +// new_meshes[ m_ ][ level ][ index ].add_interval( interval ); +// } + +// }); + +// meshes[ m_ ] = { new_meshes[ m_ ], true }; + +// } + +// for( std::size_t m_ = 0; m_ < meshes.size(); ++m_ ){ // over each domains +// for( int level=global.min_level(); level<=global.max_level(); ++level ) { +// auto intersect = intersection( global[ level ], meshes[ m_ ][ level ]); + +// intersect([&]( auto & i, auto & index ) { +// fake_mpi_rank( level, i, index ) = static_cast( m_ ); +// }); +// } + +// AMesh_t tmp = { exchanged[ m_], true }; +// for( int level=global.min_level(); level<=global.max_level(); ++level ) { +// auto intersect = intersection( global[ level ], tmp[ level ]); + +// intersect([&]( auto & i, auto & index ) { +// fake_mpi_rank( level, i, index ) = static_cast( m_ ); +// }); +// } +// } + +// } diff --git a/include/samurai/load_balancing_diffusion.hpp b/include/samurai/load_balancing_diffusion.hpp new file mode 100644 index 000000000..29947c322 --- /dev/null +++ b/include/samurai/load_balancing_diffusion.hpp @@ -0,0 +1,398 @@ +#pragma once + +#include +#include "load_balancing.hpp" +#include "field.hpp" + +// for std::sort +#include + +#ifdef SAMURAI_WITH_MPI +namespace Load_balancing{ + + class Diffusion : public samurai::LoadBalancer { + + private: + + int _ndomains; + int _rank; + + const double _unbalance_threshold = 0.05; // 5 % + + public: + + Diffusion() { + + #ifdef SAMURAI_WITH_MPI + boost::mpi::communicator world; + _ndomains = world.size(); + _rank = world.rank(); + #else + _ndomains = 1; + _rank = 0; + #endif + } + + inline std::string getName() const { return "diffusion"; } + + template + bool require_balance_impl( Mesh_t & mesh ) { + + boost::mpi::communicator world; + + logs << fmt::format("\n# [Diffusion_LoadBalancer] required_balance_impl ") << std::endl; + + double nbCells_tot = 0; + std::vector nbCellsPerProc; + boost::mpi::all_gather( world, static_cast( mesh.nb_cells( Mesh_t::mesh_id_t::cells ) ), nbCellsPerProc ); + + for(size_t ip=0; ip ( world.size() ); + + for(size_t ip=0; ip _unbalance_threshold ) return true; + } + + return false; + } + + template + auto reordering_impl( Mesh_t & mesh ) { + auto flags = samurai::make_field("diffusion_flag", mesh); + flags.fill( _rank ); + + return flags; + } + + template + std::vector getStencilToNeighbour( const Coord_t & bc_current, const Coord_t & bc_neigh ) const { + + Stencil_t dir_from_neighbour; + std::vector stencils; + + Coord_t tmp; + double n2 = 0.; + for( size_t idim = 0; idim( tmp( idim ) / 0.5 ); + + // FIXME why needed ? + if( std::abs( dir_from_neighbour( idim ) ) > 1 ) { + dir_from_neighbour( idim ) < 0 ? dir_from_neighbour( idim ) = -1 : dir_from_neighbour( idim ) = 1; + } + + if( dir_from_neighbour( idim ) != 0 ) { + Stencil_t dd; + dd.fill(0); + dd(idim) = dir_from_neighbour(idim); + stencils.emplace_back( dd ); + } + } + + return stencils; + } + + template + auto load_balance_impl( Mesh_t & mesh ){ + + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + using CellList_t = typename Mesh_t::cl_type; + using mesh_id_t = typename Mesh_t::mesh_id_t; + + using Coord_t = xt::xtensor_fixed>; + using Stencil = xt::xtensor_fixed>; + + boost::mpi::communicator world; + + // For debug + // std::ofstream logs; + // logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app ); + logs << fmt::format("> New load-balancing using {} ", getName() ) << std::endl; + + std::vector & neighbourhood = mesh.mpi_neighbourhood(); + + /* + * Correction of the list of neighbour process to take into account into the graph when computing + * fluxes that this load-balancing strategy does not exchange in diagonal. Might no longer be necessary + * if stencil are splitted by direction. + */ + // std::vector forceNeighbour; + // { + // std::vector & neighbourhood_tmp = mesh.mpi_neighbourhood(); + + // for( auto & neighbour : neighbourhood_tmp ){ + // auto interface = samurai::cmptInterface( mesh, neighbour.mesh ); + // size_t nintervals = 0; + // for_each_interval(interface, [&]( [[maybe_unused]] size_t level, [[maybe_unused]] const auto & i, [[maybe_unused]] const auto & ii ){ + // nintervals ++; + // }); + // if( nintervals > 0 ){ + // forceNeighbour.emplace_back( neighbour.rank ); + // neighbourhood.emplace_back( neighbour ); + // } + // } + + // logs << "Corrected neighbours : "; + // for(const auto & fn : forceNeighbour ) + // logs << fn << ", "; + // logs << std::endl; + // } + + size_t n_neighbours = neighbourhood.size(); + + // compute fluxes in terms of number of intervals to transfer/receive + // by default, perform 5 iterations + // std::vector fluxes = samurai::cmptFluxes( mesh, forceNeighbour, 5 ); + std::vector fluxes = samurai::cmptFluxes( mesh, 5 ); + + std::vector new_fluxes( fluxes ); + + // get loads from everyone + std::vector loads; + int my_load = static_cast( samurai::cmptLoad( mesh ) ); + boost::mpi::all_gather( world, my_load, loads ); + + { // some debug info + logs << "load : " << my_load << std::endl; + logs << "nneighbours : " << n_neighbours << std::endl; + logs << "neighbours : "; + for( size_t in=0; in cl_to_send( n_neighbours ); + + // set field "flags" for each rank. Initialized to current for all cells (leaves only) + auto flags = samurai::make_field("diffusion_flag", mesh); + flags.fill( world.rank() ); + + // load balancing order + std::vector order( n_neighbours ); + { + for( size_t i=0; i= 0 ) break; + + logs << fmt::format("\t> Working on neighbour # {}", neighbourhood[ neighbour_local_id ].rank ) << std::endl; + + // move the interface in the direction of "the center of mass" of the domain + // we basically want to move based on the normalized cartesian axis + // + // Q?: take into account already given intervals to compute BC ? + // (no weight here) + Coord_t bc_current = samurai::_cmpCellBarycenter( mesh[ mesh_id_t::cells ] ); + Coord_t bc_neighbour = samurai::_cmpCellBarycenter( neighbourhood[ neighbour_local_id ].mesh[ mesh_id_t::cells ] ); + + // Compute normalized direction to neighbour + // Stencil dir_from_neighbour; + std::vector dirs = getStencilToNeighbour( bc_current, bc_neighbour ); + // { + // auto aa_ = getStencilToNeighbour( bc_current, bc_neighbour ); + + // logs << fmt::format("\t> Number of stencils: {}", aa_.size()) << std::endl; + // for( const auto & st : aa_ ) { + // logs << fmt::format("\t\t> Stencil : ({},{})", st(0), st(1) ) << std::endl; + // } + + // Coord_t tmp; + // double n2 = 0.; + // for( size_t idim = 0; idim( tmp( idim ) / 0.5 ); + + // // FIXME why needed ? + // if( std::abs( dir_from_neighbour( idim ) ) > 1 ) { + // dir_from_neighbour( idim ) < 0 ? dir_from_neighbour( idim ) = -1 : dir_from_neighbour( idim ) = 1; + // } + // } + + // // Avoid diagonals exchange, and emphaze x-axis. Maybe two phases propagation in case of diagonal ? + // // i.e.: if (1, 1) -> (1, 0) then (0, 1) ? + + // if constexpr ( Mesh_t::dim == 2 ) { + // if( std::abs(dir_from_neighbour[0]) == 1 && std::abs( dir_from_neighbour[1]) == 1 ){ + // // dir_from_neighbour[0] = 1; + // dir_from_neighbour[1] = 0; + // } + // } + + // if constexpr ( Mesh_t::dim == 3 ) { + // if( std::abs(dir_from_neighbour[0]) == 1 && std::abs( dir_from_neighbour[1]) == 1 && std::abs(dir_from_neighbour[2]) == 1){ + // dir_from_neighbour[1] = 0; + // dir_from_neighbour[2] = 0; + // } + // } + + // logs << fmt::format("\t\t> (corrected) stencil for this neighbour # {} :", neighbourhood[ neighbour_local_id ].rank); + // for(size_t idim=0; idim( mesh, neighbourhood[ neighbour_local_id ].mesh, dir_from_neighbour ); + + bool empty = false; + { + size_t iii = 0; + samurai::for_each_interval( interface, [&]( [[maybe_unused]] size_t level, [[maybe_unused]] const auto & i, [[maybe_unused]] const auto & ii ){ + iii ++; + }); + if( iii == 0 ) empty = true; + } + + if( empty ) { + logs << "\t> Skipping neighbour, empty interface ! " << std::endl; + continue; + } + + { + int nCellsAtInterfaceGiven = 0, nCellsAtInterface = 0; + for (size_t level = mesh.min_level(); level <= mesh.max_level(); ++level) { + + auto intersect = samurai::intersection( interface[ interface.min_level() ], mesh[ mesh_id_t::cells ][ level ] ).on( level ); // need handle level difference here ! + intersect( [&]( [[maybe_unused]] const auto & interval, [[maybe_unused]] const auto & index ){ + for(size_t ii=0; ii NCellsAtInterface : {}, NCellsAtInterfaceGiven : {}", nCellsAtInterface, nCellsAtInterfaceGiven ) << std::endl; + } + + + // propagate until full-fill neighbour + { + size_t nbElementGiven = 1; // validate the while condition on starter + + logs << fmt::format("\t\t\t> Propagate for neighbour rank # {}", neighbourhood[ neighbour_local_id ].rank) << std::endl; + + int offset = 1; + while( new_fluxes[ neighbour_local_id ] < 0 && nbElementGiven > 0 ){ + + // intersection of interface with current mesh + size_t minLevelInInterface = mesh.max_level(); + + { + auto interface_on_mesh = samurai::translate( interface[ interface.min_level() ], dir_from_neighbour * offset ); // interface is monolevel ! + for (size_t level = mesh.min_level(); level <= mesh.max_level(); ++level) { + size_t nIntervalAtInterface = 0; + auto intersect = samurai::intersection( interface_on_mesh, mesh[ mesh_id_t::cells ][ level ] ).on( level ); // need handle level difference here ! + intersect( [&]( [[maybe_unused]] const auto & interval, [[maybe_unused]] const auto & index ){ + nIntervalAtInterface += 1; + }); + + if( nIntervalAtInterface > 0 ) minLevelInInterface = std::min( minLevelInInterface, level ); + } + } + + if( minLevelInInterface != interface.min_level() ) { + logs << "\t\t\t\t> [WARNING] Interface need to be update !" << std::endl; + } + + logs << fmt::format("\t\t\t\t> Min level in interface : {}", minLevelInInterface ) << std::endl; + + // CellList_t cl_given; + + nbElementGiven = 0; + + auto interface_on_mesh = translate( interface[ interface.min_level() ], dir_from_neighbour * offset ); // interface is monolevel ! + for (size_t level = mesh.min_level(); level <= mesh.max_level(); ++level) { + + size_t nCellsAtInterface = 0, nCellsAtInterfaceGiven = 0; + auto intersect = intersection( interface_on_mesh, mesh[ Mesh_t::mesh_id_t::cells ][ level ] ).on( level ); // need handle level difference here ! + + intersect( [&]( const auto & interval, const auto & index ){ + + for(size_t ii=0; ii At level {}, NCellsAtInterface : {}, NCellsAtInterfaceGiven : {}, nbElementGiven : {}", level, + nCellsAtInterface, nCellsAtInterfaceGiven, nbElementGiven ) << std::endl; + + nbElementGiven += nCellsAtInterfaceGiven; + + } + + // interface = { cl_given, false }; + + new_fluxes[ neighbour_local_id ] += static_cast( nbElementGiven ); + offset ++; + } + + } + + if( new_fluxes[ neighbour_local_id ] < 0 ){ + logs << fmt::format("\t> Error cannot fullfill the neighbour # {}, fluxes: {} ", neighbourhood[ neighbour_local_id ].rank, new_fluxes[ neighbour_local_id ] ) << std::endl; + } + + } + + } + + return flags; + } + + }; +} +#endif diff --git a/include/samurai/load_balancing_diffusion_interval.hpp b/include/samurai/load_balancing_diffusion_interval.hpp new file mode 100644 index 000000000..ca2a3acdb --- /dev/null +++ b/include/samurai/load_balancing_diffusion_interval.hpp @@ -0,0 +1,459 @@ +#pragma once + +#include +#include "load_balancing.hpp" + +#ifdef SAMURAI_WITH_MP +template +class Diffusion_LoadBalancer_interval : public samurai::LoadBalancer> { + + using Coord_t = xt::xtensor_fixed>; + + private: + int _ndomains; + int _rank; + + /** + * Compute the barycenter of the current domain based on mid-center of intervals. + * Weighted by level. + * + */ + template + Coord_t _cmpIntervalBarycenter( Mesh_t & mesh ) { // compute barycenter of current domains ( here, all domains since with simulate multiple MPI domains) + + Coord_t bary; + bary.fill( 0. ); + + double wght_tot = 0.; + samurai::for_each_interval( mesh, + [&]( std::size_t level, const auto& interval, const auto& index ) { + + // [OPTIMIZATION] precompute weight as array + // double wght = 1. / ( 1 << ( mesh.max_level() - level ) ); + constexpr double wght = 1.; + + Coord_t mid = _getIntervalMidPoint( level, interval, index ); + + for(size_t idim=0; idim + inline Coord_t _getIntervalMidPoint( size_t level, const Interval_t & interval, + const Index_t & index ) const { + Coord_t mid; + double csize = samurai::cell_length( level ); + + mid( 0 ) = ( (interval.end - interval.start) * 0.5 + interval.start ) * csize ; + for( size_t idim=0; idim + Mesh_t load_balance_impl( Mesh_t & mesh ){ + + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + using CellList_t = typename Mesh_t::cl_type; + using CellArray_t = samurai::CellArray; + using mesh_id_t = typename Mesh_t::mesh_id_t; + + boost::mpi::communicator world; + + // For debug + std::ofstream logs; + logs.open( "log_" + std::to_string( world.rank() ) + ".dat", std::ofstream::app ); + logs << "# New load balancing" << std::endl; + + // compute fluxes in terms of number of intervals to transfer/receive + std::vector fluxes = samurai::cmptFluxes( mesh ); + std::vector new_fluxes( fluxes ); + + // get loads from everyone + std::vector loads; + int my_load = static_cast( samurai::cmptLoad( mesh ) ); + boost::mpi::all_gather( world, my_load, loads ); + + std::vector & neighbourhood = mesh.mpi_neighbourhood(); + size_t n_neighbours = neighbourhood.size(); + + { // some debug info + logs << "load : " << my_load << std::endl; + logs << "nneighbours : " << n_neighbours << std::endl; + logs << "neighbours : "; + for( size_t in=0; in cl_to_send( n_neighbours ); + + bool balancing_done = false; + while( ! balancing_done ){ + + // select neighbour with the highest needs of load + bool neighbour_found = false; + std::size_t requester = 0; + int requested_load = 0; + for(std::size_t nbi=0; nbi= 0 ) continue; + + // FIX: Add condition (&& interface not empty ) ? + // Neighbourhood should have been updated in a way that an interface exists ! + if( - new_fluxes[ nbi ] > requested_load ){ + requested_load = - new_fluxes[ nbi ]; + requester = nbi; + neighbour_found = true; + } + } + + if( ! neighbour_found ){ + { // debug + logs << "No more neighbour found " << std::endl; + } + + balancing_done = true; + break; + } + + { // debug + logs << "Requester neighbour : " << neighbourhood[ requester ].rank << ", fluxes : " << requested_load << std::endl; + } + + // select interval for this neighbour by moving in cartesian direction by one + auto interface = samurai::cmptInterface( mesh, neighbourhood[ requester ].mesh ); + + { // check emptyness of interface, if it is empty, then set fluxes for this neighbour to 0 + size_t nelement = 0; + samurai::for_each_interval( interface, [&]( [[maybe_unused]] std::size_t level, [[maybe_unused]] const auto & interval, + [[maybe_unused]] const auto & index ){ + nelement += 1; + }); + + if( nelement == 0 ){ + new_fluxes[ requester ] = 0; + + { // debug + std::cerr << fmt::format("\t> Process {}, Warning no interface with a requester neighbour # {}", world.rank(), + neighbourhood[ requester ].rank ) << std::endl; + logs << "Requester neighbour, no interface found, set fluxes to " << new_fluxes[ requester ] << std::endl; + } + }else{ + { // debug + logs << "Requester neighbour, interface " << nelement << " intervals " << std::endl; + } + } + } + + // go through interval on the interface and add as much as possible + // skip this to add the whole interface + CellList_t cl_for_neighbour; + + size_t nbIntervalAdded = 0; + samurai::for_each_interval( interface, [&]( std::size_t level, const auto & interval, const auto & index ){ + + if( new_fluxes[ requester ] < 0 ){ + cl_for_neighbour[ level ][ index ].add_interval( interval ); + // new_fluxes[ requester ] += 1; // here interval load == 1, no weight; + nbIntervalAdded += 1; + } + + }); + + new_fluxes[ requester ] += nbIntervalAdded; + + { // remove interval from current process mesh and add it to neighbour mesh local copy ! + CellArray_t ca_for_neighbour = { cl_for_neighbour, false }; + + // mesh.remove( ca_for_neighbour ); + // neighbourhood[ requester ].mesh.merge( ca_for_neighbour ); + + // update gobal list that will be sent to neighbour process + samurai::for_each_interval( ca_for_neighbour, [&]( std::size_t level, const auto & interval, const auto & index ){ + cl_to_send[ requester ][ level ][ index ].add_interval( interval ); + }); + } + + { // debug + logs << "New flux for this neighbour : " << new_fluxes[ requester ] << std::endl; + } + + } + + /* ---------------------------------------------------------------------------------------------------------- */ + /* ------- Data transfer between processes ------------------------------------------------------------------ */ + /* ---------------------------------------------------------------------------------------------------------- */ + + CellList_t new_cl, need_remove; + + // at this point local mesh of neighbour are modified ( technically it should match what would results from send) + // and local mesh has been modified + for(std::size_t ni=0; ni 0 ) { // receive data + CellArray_t to_rcv; + world.recv( neighbourhood[ ni ].rank, 42, to_rcv ); + + logs << fmt::format("\t>[load_balance_impl]::interval Rank # {} receiving {} cells from rank # {}", world.rank(), to_rcv.nb_cells(), neighbourhood[ ni ].rank) << std::endl; + + // old strategy + // mesh.merge( to_rcv ); + + // add to future mesh of current process + samurai::for_each_interval(to_rcv, + [&](std::size_t level, const auto& interval, const auto& index) + { + new_cl[ level ][ index ].add_interval( interval ); + }); + + }else{ // send data to + CellArray_t to_send = { cl_to_send[ ni ], false }; + world.send( neighbourhood[ ni ].rank, 42, to_send ); + + logs << fmt::format("\t>[load_balance_impl]::interval Rank # {} sending {} cells to rank # {}", world.rank(), to_send.nb_cells(), neighbourhood[ ni ].rank) << std::endl; + + samurai::for_each_interval( to_send, + [&](std::size_t level, const auto& interval, const auto& index) + { + need_remove[ level ][ index ].add_interval( interval ); + }); + } + + } + + /* ---------------------------------------------------------------------------------------------------------- */ + /* ------- Construct new mesh for current process ----------------------------------------------------------- */ + /* ---------------------------------------------------------------------------------------------------------- */ + + // add to new_cl interval that were not sent + CellArray_t need_remove_ca = { need_remove }; // to optimize + for( size_t level=mesh.min_level(); level<=mesh.max_level(); ++level ){ + auto diff = samurai::difference( mesh[ mesh_id_t::cells ][ level ], need_remove_ca[ level ] ); + + diff([&]( auto & interval, auto & index ){ + new_cl[ level ][ index ].add_interval( interval ); + }); + } + + Mesh_t new_mesh( new_cl, mesh ); + + return new_mesh; + } + + template + [[deprecated]] + void load_balance_impl2( Mesh_t & mesh ){ + + using interval_t = typename Mesh_t::interval_t; + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + using CellList_t = typename Mesh_t::cl_type; + using CellArray_t = samurai::CellArray; + using mesh_id_t = typename Mesh_t::mesh_id_t; + + boost::mpi::communicator world; + + std::ofstream logs; + // DEBUG + logs.open( "log_" + std::to_string( world.rank() ) + ".dat", std::ofstream::app ); + logs << "# New load balancing" << std::endl; + + std::string smt = "# " + std::to_string( _rank ) + " [Diffusion_LoadBalancer_interval]::load_balance_impl "; + SAMURAI_TRACE( smt + "Entering function ..." ); + + // give access to rank & mesh of neighbour + std::vector & neighbourhood = mesh.mpi_neighbourhood(); + size_t n_neighbours = neighbourhood.size(); + + // get the load to neighbours (geometrical neighbour) + std::vector fluxes = samurai::cmptFluxes( mesh ); + + { + logs << "load : " << samurai::cmptLoad( mesh ) << std::endl; + logs << "nneighbours : " << n_neighbours << std::endl; + logs << "neighbours : "; + for( size_t in=0; in( mesh ); + + // compute some point of reference in mesh and interval-based interface + // Coord_t barycenter = _cmpIntervalBarycenter( mesh[ mesh_id_t::cells ] ); + Coord_t barycenter = samurai::_cmpCellBarycenter( mesh[ mesh_id_t::cells ] ); + logs << "Domain barycenter : " << fmt::format( " barycenter : ({}, {})", barycenter(0), barycenter(1) ) << std::endl; + + std::vector invloads; + double my_load = static_cast( samurai::cmptLoad( mesh ) ); + boost::mpi::all_gather( world, my_load, invloads ); + for(size_t il=0; il barycenter_interface_neighbours( n_neighbours ); + std::vector barycenter_neighbours( n_neighbours ); + + for(size_t nbi=0; nbi( interface[ nbi ] ); + barycenter_neighbours[ nbi ] = samurai::_cmpCellBarycenter( neighbourhood[ nbi ].mesh[ mesh_id_t::cells ] ); + + // debug + // auto s_ = fmt::format( "Barycenter neighbour : ({}, {})", + // barycenter_interface_neighbours[ nbi ]( 0 ), + // barycenter_interface_neighbours[ nbi ]( 1 ) ); + + // logs << s_ << std::endl; + } + + struct Data { + size_t level; + interval_t interval; + xt::xtensor_fixed> indices; + int rank; + }; + + // build map of interval that needs to be sent + std::multimap repartition; + + constexpr auto fdist = samurai::Distance_t::GRAVITY; + + for_each_interval( mesh[ Mesh_t::mesh_id_t::cells ], + [&]( std::size_t level, const auto& interval, const auto& index ){ + + // cartesian coordinates + auto mid_point = _getIntervalMidPoint( level, interval, index ); + + // process that might get the interval + int winner_id = -1; + // double winner_dist = std::numeric_limits::max(); + double winner_dist = samurai::getDistance( mid_point, barycenter ) * invloads[ _rank ]; + + // select the neighbour + for( size_t ni=0; ni= 0 ) continue; // skip neighbour that will recv + + // this might fix ilots but require neighbour update + // double dist = std::min( distance_inf( mid_point, barycenter_interface_neighbours[ ni ] ), + // distance_inf( mid_point, barycenter_neighbours[ ni ] ) ); + + double dist = samurai::getDistance( mid_point, barycenter_neighbours[ ni ] ) * invloads[ neighbour_rank ] ; + // double dist = samurai::distance_inf( mid_point, barycenter_interface_neighbours[ ni ] ); + + if( dist < winner_dist ){ + winner_id = ni; + winner_dist = dist; + } + + } + + if( winner_id >= 0 ){ + + if( repartition.find( winner_dist ) != repartition.find( winner_dist ) ){ + std::cerr << "\t> WARNING: Key conflict for std::map !" << std::endl; + } + + repartition.insert ( std::pair( winner_dist, Data { level, interval, index, winner_id } ) ); + + } + + }); + + std::vector cl_to_send( n_neighbours ); + std::vector ca_to_send( n_neighbours ); + + // distribute intervals based on ordered distance to neighbours + // rank is not the rank but the offset in the neighbourhood + std::vector given_ ( n_neighbours, 0 ); + // for( auto & it : repartition ){ + for( auto it = repartition.begin(); it != repartition.end(); it++ ){ + + auto nrank = neighbourhood[ it->second.rank ].rank; + auto lvl = it->second.level; + + logs << fmt::format("\t> Interval {} --> to rank {} ( level : {} , dist : {}", it->second.interval, nrank, lvl, it->first) << std::endl; + + // shouldn't we give it to the second closest neighbour ?! + if( given_[ it->second.rank ] + it->second.interval.size() <= ( - fluxes[ it->second.rank ] ) ){ + cl_to_send[ it->second.rank ][ it->second.level ][ it->second.indices ].add_interval( it->second.interval ); + given_[ it->second.rank ] += it->second.interval.size(); + } + + } + + for(size_t nbi=0; nbi Number of cells to send to process # " << neighbourhood[ nbi ].rank + << " : " << ca_to_send[ nbi ].nb_cells() << std::endl; + } + + // actual data transfer occurs here + for(size_t ni=0; ni 0 ) { // receive data + samurai::CellArray to_rcv; + world.recv( neighbourhood[ ni ].rank, 42, to_rcv ); + mesh.merge( to_rcv ); + }else{ // send data to + world.send( neighbourhood[ ni ].rank, 42, ca_to_send[ ni ] ); + mesh.remove( ca_to_send[ ni ] ); + } + } + + // update neighbour mesh + mesh.update_mesh_neighbour(); + } + +}; +#endif diff --git a/include/samurai/load_balancing_force.hpp b/include/samurai/load_balancing_force.hpp new file mode 100644 index 000000000..7d9ff71ef --- /dev/null +++ b/include/samurai/load_balancing_force.hpp @@ -0,0 +1,511 @@ +#pragma once + +#include +#include "load_balancing.hpp" + +#ifdef SAMURAI_WITH_MPI +namespace Load_balancing{ + + class GlobalCriteria : public samurai::LoadBalancer { + + private: + int _ndomains; + int _rank; + + const double _unbalance_threshold = 0.05; // 5 % + + template + double getSurfaceOrVolume( Mesh_t & mesh ) const { + + double s_ = 0.; + for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto& cell ){ + + double s = 1.; + + for(size_t idim=0; idim + bool require_balance_impl( Mesh_t & mesh ) { + + boost::mpi::communicator world; + + // logs << fmt::format("\n# [SFC_LoadBalancer_interval::Morton] required_balance_impl ") << std::endl; + + double nbCells_tot = 0; + std::vector nbCellsPerProc; + boost::mpi::all_gather( world, static_cast( mesh.nb_cells( Mesh_t::mesh_id_t::cells ) ), nbCellsPerProc ); + + for(size_t ip=0; ip ( world.size() ); + + for(size_t ip=0; ip _unbalance_threshold ) return true; + } + + return false; + } + + template + auto reordering_impl( Mesh_t & mesh ){ + auto flags = samurai::make_field("rank", mesh); + flags.fill( _rank ); + return flags; + } + + template + auto load_balance_impl( Mesh_t & mesh ){ + + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + using Coord_t = xt::xtensor_fixed>; + + boost::mpi::communicator world; + + // For debug purpose + std::ofstream logs; + logs.open( "log_" + std::to_string( world.rank() ) + ".dat", std::ofstream::app ); + logs << "# New load balancing" << std::endl; + + // give access to rank & mesh of neighbour + std::vector & neighbourhood = mesh.mpi_neighbourhood(); + + std::size_t n_neighbours = neighbourhood.size(); + + std::vector loads; + double my_load = static_cast( samurai::cmptLoad( mesh ) ); + boost::mpi::all_gather( world, my_load, loads ); + + // get the load to neighbours (geometrical neighbour) with 5 iterations max + std::vector fluxes = samurai::cmptFluxes( mesh, 1 ); + + { + logs << "load : " << my_load << std::endl; + logs << "nneighbours : " << n_neighbours << std::endl; + logs << "neighbours : "; + for( size_t in=0; in + auto interface = samurai::_computeCartesianInterface( mesh ); + + // Invalidate fluxes with non adjacents neighbours, should not happen or might happen if load balancing exchange does not consider some + // direction. + std::vector ncells_interface ( n_neighbours, 0 ); + for(size_t ni=0; ni( mesh ); + logs << "Domain barycenter : " << fmt::format( " barycenter : ({}, {})", barycenter(0), barycenter(1) ) << std::endl; + + // std::vector barycenter_interface_neighbours( n_neighbours ); + std::vector barycenter_neighbours( n_neighbours ); + std::vector sv( n_neighbours ); + + for(size_t nbi=0; nbi( interface[ nbi ] ); + barycenter_neighbours[ nbi ] = samurai::_cmpCellBarycenter( neighbourhood[ nbi ].mesh ); + + // surface or volume depending on Mesh_t::dim + sv[ nbi ] = getSurfaceOrVolume( neighbourhood[ nbi ].mesh ); + + // debug + auto s_ = fmt::format( "Barycenter neighbour # {}: ({}, {})", neighbourhood[ nbi ].rank, + barycenter_neighbours[ nbi ]( 0 ), + barycenter_neighbours[ nbi ]( 1 ) ); + + logs << s_ << std::endl; + } + + // build map of interval that needs to be sent. Warning, it does not work with classical std::map !! + auto flags = samurai::make_field("rank", mesh); + flags.fill( world.rank() ); + + constexpr auto fdist = samurai::Distance_t::L1; + + double currentSV = getSurfaceOrVolume( mesh ); + + std::vector mload( static_cast( world.size() ), 0 ); + for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto& cell ){ + + auto cc = cell.center(); + + // process that might get the interval + int winner_id = -1; + + // double winner_dist = std::numeric_limits::max(); + // double winner_dist = samurai::getDistance( cell, barycenter ) / loads[ world.rank() ]; + + // double coeff_current = currentSV / loads[ static_cast( world.rank() ) ]; + double coeff_current = 1.; // loads[ static_cast( world.rank() ) ]; + double winner_dist = samurai::getDistance( cc, barycenter ) * coeff_current; + + // logs << fmt::format("\t\t> Cell : ({},{}), current mesh dist : {}", cc(0), cc(1), winner_dist ) << std::endl; + + // select the neighbour + for( std::size_t ni=0; ni( neighbourhood[ ni ].rank ); + + if( fluxes[ ni ] >= 0 ) continue; // skip neighbour that will recv + + // this might fix ilots but require neighbour update + // double dist = std::min( distance_inf( mid_point, barycenter_interface_neighbours[ ni ] ), + // distance_inf( mid_point, barycenter_neighbours[ ni ] ) ); + + // double dist = samurai::getDistance( cell, barycenter_interface_neighbours[ ni ] ) / loads[ neighbour_rank ]; + // double coeff = sv[ ni ] / ( loads[ neighbour_rank ] ) ; // / sv[ ni ]; + double coeff = 1.; // loads[ neighbour_rank ] ; // mload[ neighbour_rank ]; + double dist = samurai::getDistance( cell.center(), barycenter_neighbours[ ni ] ) * coeff; + + // logs << fmt::format("\t\t\t> Dist to neighbour {} : {}", neighbour_rank, dist ) << std::endl; + + if( dist < winner_dist && ncells_interface[ ni ] > 0 ){ + winner_id = static_cast( ni ); + winner_dist = dist; + + mload[ neighbour_rank ] += 1; + + // if( mload[ neighbour_rank ] >= ( - fluxes[ ni ] ) ) fluxes[ ni ] = 0; + } + + } + + // logs << fmt::format("\t\t\t> Cell given to process #{}", neighbourhood[ static_cast( winner_id ) ].rank ) << std::endl; + + if( winner_id >= 0 ){ + assert( winner_id < neighbourhood.size() ); + flags[ cell ] = neighbourhood[ static_cast( winner_id ) ].rank; + } + + }); + + return flags; + } + + template + xt::xtensor_fixed> _barycenterWithFlags( const Mesh_t & mesh, const Field_t & flags ) + { + using Coord_t = xt::xtensor_fixed>; + + Coord_t bary; + bary.fill(0.); + + double wght_tot = 0.; + samurai::for_each_cell(mesh, [&, this]( const auto & cell ) { + double wght = 1.; + + if constexpr( w == samurai::Weight::OnSmall ){ + wght = 1. / static_cast( 1 << (mesh.max_level() - cell.level) ); + } + + if constexpr( w == samurai::Weight::OnLarge ){ + wght = 1. / static_cast( 1 << cell.level ); + } + + if( flags[ cell ] == this->_rank ) { + auto cc = cell.center(); + + for (size_t idim = 0; idim < dim; ++idim) + { + bary(idim) += cc(idim) * wght; + } + + wght_tot += wght; + } + + }); + + wght_tot = std::max(wght_tot, 1e-12); + for (size_t idim = 0; idim < dim; ++idim) + { + bary(idim) /= wght_tot; + } + + return bary; + } + + template + xt::xtensor_fixed> _barycenterWithMeshWithFlags( const Mesh_t & mesh, const Field_t & flags, int rank, const Mesh_t & otherMesh ) + { + using Coord_t = xt::xtensor_fixed>; + + Coord_t bary; + bary.fill(0.); + + double wght_tot = 0.; + samurai::for_each_cell(mesh, [&]( const auto & cell ) { + double wght = 1.; + + if constexpr( w == samurai::Weight::OnSmall ){ + wght = 1. / static_cast( 1 << (mesh.max_level() - cell.level) ); + } + + if constexpr( w == samurai::Weight::OnLarge ){ + wght = 1. / static_cast( 1 << cell.level ); + } + + if( flags[ cell ] == rank ) { + auto cc = cell.center(); + + for (size_t idim = 0; idim < dim; ++idim) + { + bary(idim) += cc(idim) * wght; + } + + wght_tot += wght; + } + + }); + + samurai::for_each_cell(otherMesh, [&]( const auto & cell ) { + double wght = 1.; + + if constexpr( w == samurai::Weight::OnSmall ){ + wght = 1. / static_cast( 1 << (mesh.max_level() - cell.level) ); + } + + if constexpr( w == samurai::Weight::OnLarge ){ + wght = 1. / static_cast( 1 << cell.level ); + } + + auto cc = cell.center(); + + for (size_t idim = 0; idim < dim; ++idim) + { + bary(idim) += cc(idim) * wght; + } + + wght_tot += wght; + + }); + + wght_tot = std::max(wght_tot, 1e-12); + for (size_t idim = 0; idim < dim; ++idim) + { + bary(idim) /= wght_tot; + } + + return bary; + } + + template + auto load_balance_impl2( Mesh_t & mesh ){ + + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + using Coord_t = xt::xtensor_fixed>; + + boost::mpi::communicator world; + + logs << "# New load balancing" << std::endl; + + // build map of interval that needs to be sent. Warning, it does not work with classical std::map !! + auto flags = samurai::make_field("rank", mesh); + flags.fill( world.rank() ); + + // give access to rank & mesh of neighbour + std::vector & neighbourhood = mesh.mpi_neighbourhood(); + + std::size_t n_neighbours = neighbourhood.size(); + + std::vector loads; + double my_load = static_cast( samurai::cmptLoad( mesh ) ); + boost::mpi::all_gather( world, my_load, loads ); + + // Compute fluxes based on neighbourdhood graph + std::vector fluxes = samurai::cmptFluxes( mesh, 1 ); + + { + logs << "load : " << my_load << std::endl; + logs << "nneighbours : " << n_neighbours << std::endl; + logs << "neighbours : "; + for( size_t in=0; in + auto interface = samurai::_computeCartesianInterface( mesh ); + + // Invalidate fluxes with non adjacents neighbours, should not happen or might happen if load balancing exchange does not consider some + // direction or neighbourdhood is overkill (all meshes for example) + std::vector ncells_interface ( n_neighbours, 0 ); + for(size_t ni=0; ni( mesh, flags ); + logs << "Domain barycenter : " << fmt::format( " barycenter : ({}, {})", barycenter(0), barycenter(1) ) << std::endl; + + // std::vector barycenter_interface_neighbours( n_neighbours ); + std::vector barycenter_neighbours( n_neighbours ); + + for(size_t nbi=0; nbi( mesh, flags, neighbourhood[ nbi ].rank, neighbourhood[ nbi ].mesh ); + + // debug + auto s_ = fmt::format( "Barycenter neighbour # {}: ({}, {})", neighbourhood[ nbi ].rank, + barycenter_neighbours[ nbi ]( 0 ), + barycenter_neighbours[ nbi ]( 1 ) ); + + logs << s_ << std::endl; + } + + constexpr auto fdist = samurai::Distance_t::L1; + + double currentSV = getSurfaceOrVolume( mesh ); + + std::vector mload( static_cast( world.size() ), 0 ); + for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto& cell ){ + + auto cc = cell.center(); + + // process that might get the interval + int winner_id = -1; + double winner_dist = samurai::getDistance( cc, barycenter ) / currentSV; + + // logs << fmt::format("\t\t> Cell : ({},{}), current mesh dist : {}", cc(0), cc(1), winner_dist ) << std::endl; + + // select the neighbour + for( std::size_t ni=0; ni( neighbourhood[ ni ].rank ); + + if( fluxes[ ni ] >= 0 ) continue; // skip neighbour that will recv + + double dist = samurai::getDistance( cell.center(), barycenter_neighbours[ ni ] ); + + // logs << fmt::format("\t\t\t> Dist to neighbour {} : {}", neighbour_rank, dist ) << std::endl; + + if( dist < winner_dist && ncells_interface[ ni ] > 0 ){ + winner_id = static_cast( ni ); + winner_dist = dist; + } + + } + + if( winner_id >= 0 ){ + assert( winner_id < neighbourhood.size() ); + flags[ cell ] = neighbourhood[ static_cast( winner_id ) ].rank; + + mload[ neighbourhood[ winner_id ].rank ] ++; + } + + }); + + // update barycenter + barycenter = _barycenterWithFlags( mesh, flags ); + logs << "Updated domain barycenter : " << fmt::format( " barycenter : ({}, {})", barycenter(0), barycenter(1) ) << std::endl; + for(size_t nbi=0; nbi( mesh, flags, neighbourhood[ nbi ].rank, neighbourhood[ nbi ].mesh ); + + // debug + auto s_ = fmt::format( "Updated Barycenter neighbour # {}: ({}, {})", neighbourhood[ nbi ].rank, + barycenter_neighbours[ nbi ]( 0 ), + barycenter_neighbours[ nbi ]( 1 ) ); + + logs << s_ << std::endl; + } + + // second pass + for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto& cell ){ + + auto cc = cell.center(); + + // process that might get the interval + int winner_id = -1; + double winner_dist = samurai::getDistance( cc, barycenter ) / currentSV; + + // logs << fmt::format("\t\t> Cell : ({},{}), current mesh dist : {}", cc(0), cc(1), winner_dist ) << std::endl; + + // select the neighbour + for( std::size_t ni=0; ni( neighbourhood[ ni ].rank ); + + if( fluxes[ ni ] + mload[ ni ] >= 0 ) continue; // skip neighbour that will recv + + double dist = samurai::getDistance( cell.center(), barycenter_neighbours[ ni ] ); + + // logs << fmt::format("\t\t\t> Dist to neighbour {} : {}", neighbour_rank, dist ) << std::endl; + + if( dist < winner_dist && ncells_interface[ ni ] > 0 ){ + winner_id = static_cast( ni ); + winner_dist = dist; + } + + } + + if( winner_id >= 0 ){ + assert( winner_id < neighbourhood.size() ); + flags[ cell ] = neighbourhood[ static_cast( winner_id ) ].rank; + + mload[ neighbourhood[ winner_id ].rank ] ++; + } + + }); + + for( std::size_t ni=0; ni +#include "load_balancing.hpp" +#include + +// for std::sort +#include +#ifdef SAMURAI_WITH_MPI +namespace Load_balancing{ + + class Life : public samurai::LoadBalancer { + + private: + + int _ndomains; + int _rank; + + public: + + Life() { + + #ifdef SAMURAI_WITH_MPI + boost::mpi::communicator world; + _ndomains = world.size(); + _rank = world.rank(); + #else + _ndomains = 1; + _rank = 0; + #endif + } + + inline std::string getName() const { return "life"; } + + template + auto reordering_impl( Mesh_t & mesh ) { + auto flags = samurai::make_field("ordering_flag", mesh); + flags.fill( _rank ); + + return flags; + } + + template + auto load_balance_impl( Mesh_t & mesh ){ + + using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t; + // using CellList_t = typename Mesh_t::cl_type; + // using mesh_id_t = typename Mesh_t::mesh_id_t; + + // using Coord_t = xt::xtensor_fixed>; + // using Stencil = xt::xtensor_fixed>; + + boost::mpi::communicator world; + + // For debug + // std::ofstream logs; + // logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app ); + logs << fmt::format("> New load-balancing using {} ", getName() ) << std::endl; + + auto flags = samurai::make_field("balancing_flags", mesh); + flags.fill( _rank ); + + // neighbourhood + std::vector & neighbourhood = mesh.mpi_neighbourhood(); + + // fluxes to each neighbour + std::vector fluxes = samurai::cmptFluxes( mesh, 5 ); + + // cpy that can be modified + std::vector new_fluxes( fluxes ); + + // Loads of each processes + std::vector loads; + int my_load = static_cast( samurai::cmptLoad( mesh ) ); + boost::mpi::all_gather( world, my_load, loads ); + + // samurai::cellExists(); + + return flags; + } + + }; +} +#endif diff --git a/include/samurai/load_balancing_metis.hpp b/include/samurai/load_balancing_metis.hpp new file mode 100644 index 000000000..072a376de --- /dev/null +++ b/include/samurai/load_balancing_metis.hpp @@ -0,0 +1,121 @@ +// Copyright 2018-2024 the samurai's authors +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include + +#include "field.hpp" +#include "load_balancing_utils.hpp" + +namespace samurai::Load_balancing +{ + class Metis : public samurai::LoadBalancer + { + private: + + int _ndomains; + int _rank; + + template + void propagate(const Mesh_t&, const Stencil&, Field_t&, int, int&) const + { + } + + public: + + Metis() + { +#ifdef SAMURAI_WITH_MPI + boost::mpi::communicator world; + _ndomains = world.size(); + _rank = world.rank(); +#else + _ndomains = 1; + _rank = 0; +#endif + } + + inline std::string getName() const + { + return "metis"; + } + + template + Mesh_t load_balance_impl(Mesh_t& mesh) + { + constexpr std::size_t dim = Mesh_t::dim; + using mesh_id_t = typename Mesh_t::mesh_id_t; + auto graph = build_graph(mesh); + + boost::mpi::communicator world; + + idx_t ndims = dim; + idx_t wgtflag = 1; + idx_t numflag = 0; + idx_t ncon = 1; + idx_t nparts = world.size(); + std::vector tpwgts(nparts, 1.0 / nparts); + real_t ubvec[] = {1.05}; + idx_t options[10] = {0}; + idx_t edgecut = 0; + std::vector part(mesh[mesh_id_t::cells].nb_cells(), 17); + MPI_Comm mpi_comm = MPI_COMM_WORLD; + + // ParMETIS_V3_PartKway(global_index_offset.data(), + // xadj.data(), + // adjncy.data(), + // NULL, + // NULL, // adjwgt.data(), + // &wgtflag, + // &numflag, + // &ncon, + // &nparts, + // tpwgts.data(), + // ubvec, + // options, + // &edgecut, + // part.data(), + // &mpi_comm); + + ParMETIS_V3_PartGeomKway(graph.global_index_offset.data(), + graph.xadj.data(), + graph.adjncy.data(), + NULL, + graph.adjwgt.data(), + &wgtflag, + &numflag, + &ndims, + graph.xyz.data(), + &ncon, + &nparts, + tpwgts.data(), + ubvec, + options, + &edgecut, + part.data(), + &mpi_comm); + + // ParMETIS_V3_AdaptiveRepart + + /* ---------------------------------------------------------------------------------------------------------- */ + /* ------- Construct new mesh for current process ----------------------------------------------------------- */ + /* ---------------------------------------------------------------------------------------------------------- */ + + auto partition_field = make_field("partition", mesh); + std::size_t ipart = 0; + for_each_cell(mesh, + [&](const auto& cell) + { + partition_field[cell] = part[ipart++]; + }); + // std::cout << "coucou" << std::endl; + save("metis_partition", mesh, partition_field); + // std::cout << "coucou 2" << std::endl; + + // Mesh_t new_mesh(new_cl, mesh); + + return mesh; + } + }; +} diff --git a/include/samurai/load_balancing_scotch.hpp b/include/samurai/load_balancing_scotch.hpp new file mode 100644 index 000000000..83f36c6cb --- /dev/null +++ b/include/samurai/load_balancing_scotch.hpp @@ -0,0 +1,126 @@ +// Copyright 2018-2024 the samurai's authors +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include "ptscotch.h" + +#include "field.hpp" +#include "load_balancing.hpp" +#include "load_balancing_utils.hpp" +#include "mesh_interval.hpp" +#include "stencil.hpp" + +using namespace xt::placeholders; + +namespace samurai::Load_balancing +{ + class Scotch : public samurai::LoadBalancer + { + private: + + int _ndomains; + int _rank; + + template + void propagate(const Mesh_t&, const Stencil&, Field_t&, int, int&) const + { + } + + public: + + Scotch() + { +#ifdef SAMURAI_WITH_MPI + boost::mpi::communicator world; + _ndomains = world.size(); + _rank = world.rank(); +#else + _ndomains = 1; + _rank = 0; +#endif + } + + inline std::string getName() const + { + return "Scotch"; + } + + template + Mesh_t& reordering_impl(Mesh_t& mesh) + { + return mesh; + } + + template + Mesh_t load_balance_impl(Mesh_t& mesh) + { + constexpr std::size_t dim = Mesh_t::dim; + using mesh_id_t = typename Mesh_t::mesh_id_t; + auto graph = build_graph(mesh); + + boost::mpi::communicator world; + SCOTCH_Dgraph grafdat; + SCOTCH_dgraphInit(&grafdat, MPI_COMM_WORLD); + + // std::cout << "xadj: " << xt::adapt(graph.xadj) << std::endl; + // std::cout << "adjnct: " << xt::adapt(graph.adjncy) << std::endl; + SCOTCH_dgraphBuild(&grafdat, // grafdat + 0, // baseval, c-style numbering + graph.xadj.size() - 1, // vertlocnbr, nCells + graph.xadj.size() - 1, // vertlocmax + const_cast(graph.xadj.data()), + nullptr, + + nullptr, // veloloctab, vtx weights + nullptr, // vlblloctab + + graph.adjncy.size(), // edgelocnbr, number of arcs + graph.adjncy.size(), // edgelocsiz + const_cast(graph.adjncy.data()), // edgeloctab + nullptr, // edgegsttab + nullptr // edlotab, edge weights + ); + SCOTCH_dgraphCheck(&grafdat); + SCOTCH_Strat strategy; + SCOTCH_stratInit(&strategy); + + SCOTCH_Arch archdat; + SCOTCH_archInit(&archdat); + // SCOTCH_archCmplt(&archdat, 1); + + std::vector part(mesh[mesh_id_t::cells].nb_cells(), 17); + + SCOTCH_Dmapping mappdat; + SCOTCH_dgraphMapInit(&grafdat, &mappdat, &archdat, part.data()); + + SCOTCH_dgraphPart(&grafdat, + 2, + &strategy, // const SCOTCH_Strat * + part.data() // parttab + ); + SCOTCH_archExit(&archdat); + SCOTCH_stratExit(&strategy); + SCOTCH_dgraphExit(&grafdat); + + /* ---------------------------------------------------------------------------------------------------------- */ + /* ------- Construct new mesh for current process ----------------------------------------------------------- */ + /* ---------------------------------------------------------------------------------------------------------- */ + + auto partition_field = make_field("partition", mesh); + std::size_t ipart = 0; + for_each_cell(mesh[mesh_id_t::cells], + [&](const auto& cell) + { + partition_field[cell] = part[ipart++]; + }); + // std::cout << "coucou" << std::endl; + save("metis_partition", mesh, partition_field); + // std::cout << "coucou 2" << std::endl; + + // Mesh_t new_mesh(new_cl, mesh); + + return mesh; + } + }; +} diff --git a/include/samurai/load_balancing_sfc.hpp b/include/samurai/load_balancing_sfc.hpp new file mode 100644 index 000000000..a1ed85b4f --- /dev/null +++ b/include/samurai/load_balancing_sfc.hpp @@ -0,0 +1,284 @@ +#pragma once + +#include "assertLogTrace.hpp" + +#include "load_balancing.hpp" + +#ifdef SAMURAI_WITH_MPI +template +class SFC_LoadBalancer_interval : public samurai::LoadBalancer> +{ + private: + + SFC_type_t _sfc; + int _ndomains; + int _rank; + + const double _unbalance_threshold = 0.05; // 5 % + + public: + + using samurai::LoadBalancer>::logs; + + SFC_LoadBalancer_interval() + { +#ifdef SAMURAI_WITH_MPI + boost::mpi::communicator world; + _ndomains = world.size(); + _rank = world.rank(); +#else + _ndomains = 1; + _rank = 0; +#endif + } + + inline std::string getName() const + { + return "SFC_" + _sfc.getName() + "_LB"; + } + + /** + * Re-order cells on MPI processes based on the given SFC curve. This need to be + * called once at the beginning (unless ordering is fixed in partition mesh). + * After, load balancing will exchange data based on SFC order and thus only + * boundaries will be moved. + * + */ + template + auto reordering_impl( Mesh_t & mesh ) { + + boost::mpi::communicator world; + + // For debug + // std::ofstream logs; + // logs.open("log_" + std::to_string(_rank) + ".dat", std::ofstream::app); + logs << "# [SFC_LoadBalancer_interval::Morton] Reordering cells using SFC" << std::endl; + + // SFC 1D key for cells + auto sfc_keys = samurai::make_field( "keys", mesh ); + sfc_keys.fill( 0 ); + + auto flags = samurai::make_field("rank", mesh); + flags.fill( world.rank() ); + + logs << fmt::format("\t> Computing SFC ({}) 1D indices ( cell ) ... ", _sfc.getName() ) << std::endl; + + SFC_key_t mink = std::numeric_limits::max(), maxk = std::numeric_limits::min(); + + samurai::for_each_cell(mesh[Mesh_t::mesh_id_t::cells], [&]( const auto & cell ) { + + // this is where things can get nasty, we expect indices to be positive values !! + xt::xtensor_fixed> ijk; + for (size_t idim = 0; idim < dim; ++idim) { + + // FIX need shift to get only positive index + assert( cell.indices( idim ) >= 0 ); + ijk( idim ) = static_cast( cell.indices( idim ) ) << ( mesh.max_level() - cell.level ); + } + + auto key = _sfc.template getKey( ijk ); + + sfc_keys[ cell ] = key; + + mink = std::min( key, mink ); + maxk = std::max( key, maxk ); + + }); + + // Key boundaries of current process - unused for now + std::vector bounds = { mink, maxk }; + logs << "\t\t> Local key bounds [" << bounds[ 0 ] << ", " << bounds[ 1 ] << "]" << std::endl; + + std::vector boundaries; + boost::mpi::all_gather( world, bounds.data(), static_cast( bounds.size() ), boundaries ); + + logs << "\t\t> Global key boundaries ["; + for(const auto & ik : boundaries ) + logs << ik << ","; + logs << "]" << std::endl; + + // Check overlap with previous/next process. Does not mean that there is no overlap, but at least between "adjacent" + // (MPI-1), (MPI+1) there is not overlap found + std::vector boundaries_new( static_cast( world.size() + 1 ) ); + + // find max value for boundaries + SFC_key_t globalMax = boundaries[ 0 ]; + for(size_t ip=0; ip( world.size() ); + boundaries_new[ 0 ] = 0; + for(size_t ip=1; ip Global key evenly distrib boundaries ["; + for(const auto & ik : boundaries_new ) + logs << ik << ","; + logs << "]" << std::endl; + + // distribute cell based on boundaries & sfc key + std::map comm; + samurai::for_each_cell( mesh[Mesh_t::mesh_id_t::cells], [&](const auto & cell ) { + auto key = sfc_keys[ cell ]; + + // optimize using bisect - find proc that should have this cell + for( size_t ip=0; ip< static_cast( world.size() ); ++ip ){ + if( key >= boundaries_new[ ip ] && key < boundaries_new[ ip + 1 ] ){ + flags[ cell ] = static_cast( ip ); + + // unique list of process that should be contacted + if( comm.find( static_cast( ip ) ) == comm.end() ){ + comm[ static_cast( ip ) ] = true; + } + + break; + } + } + + }); + + return flags; + } + + template + bool require_balance_impl( Mesh_t & mesh ) { + + boost::mpi::communicator world; + + logs << fmt::format("\n# [SFC_LoadBalancer_interval::Morton] required_balance_impl ") << std::endl; + + double nbCells_tot = 0; + std::vector nbCellsPerProc; + boost::mpi::all_gather( world, static_cast( mesh.nb_cells( Mesh_t::mesh_id_t::cells ) ), nbCellsPerProc ); + + for(size_t ip=0; ip ( world.size() ); + + for(size_t ip=0; ip _unbalance_threshold ) return true; + } + + return false; + } + + template + auto load_balance_impl( Mesh_t & mesh ) + { + + boost::mpi::communicator world; + + // std::ofstream logs; + // logs.open( "log_" + std::to_string( world.rank() ) + ".dat", std::ofstream::app ); + logs << fmt::format("\n# [SFC_LoadBalancer_interval::Morton] Load balancing cells ") << std::endl; + + // SFC 1D key for cells + std::map sfc_map; + auto sfc_keys = samurai::make_field( "keys", mesh ); + sfc_keys.fill( 0 ); + + auto flags = samurai::make_field("rank", mesh); + flags.fill( world.rank() ); + + logs << fmt::format("\t> Computing SFC ({}) 1D indices ( cell ) ... ", _sfc.getName() ) << std::endl; + + SFC_key_t mink = std::numeric_limits::max(), maxk = std::numeric_limits::min(); + samurai::for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto & cell ) { + + // this is where things can get nasty, we expect indices to be positive values !! + xt::xtensor_fixed> ijk; + for (size_t idim = 0; idim < Mesh_t::dim; ++idim) { + + // FIX need shift to get only positive index + assert( cell.indices( idim ) >= 0 ); + ijk( idim ) = static_cast( cell.indices( idim ) ) << ( mesh.max_level() - cell.level ); + } + + auto key = _sfc.template getKey( ijk ); + + sfc_keys[ cell ] = key; + + if( sfc_map.find( key ) != sfc_map.end() ) { + assert( false ); + std::cerr << fmt::format("Rank # {}, Error computing SFC, index not uniq ! ", world.rank()) << std::endl; + } + + sfc_map[ key ] = cell; + + mink = std::min( mink, key ); + maxk = std::max( maxk, key ); + + }); + + assert( mesh.nb_cells( Mesh_t::mesh_id_t::cells ) == sfc_map.size() ); + + size_t ncells_tot = 0, dc = 0; + std::vector nbCellsPerProc; + std::vector globIdx( static_cast( world.size() + 1 ) ); + std::vector globIdxNew( static_cast( world.size() + 1 ) ); + boost::mpi::all_gather( world, mesh.nb_cells( Mesh_t::mesh_id_t::cells ), nbCellsPerProc ); + + logs << "\t\t> Number of cells : " << mesh.nb_cells( Mesh_t::mesh_id_t::cells ) << std::endl; + + for(size_t i=0; i( world.size() ); ++i){ + globIdx[ i + 1 ] = globIdx[ i ] + nbCellsPerProc[ i ]; + ncells_tot += nbCellsPerProc[ i ]; + } + dc = ncells_tot / static_cast( world.size() ); + + // load balanced globIdx -> new theoretical key boundaries based on number of cells per proc + globIdxNew[ 0 ] = 0; + for(size_t i=0; i( world.size() ); ++i){ + globIdxNew[ i + 1 ] = globIdxNew[ i ] + dc; + } + + { + logs << "\t\t> GlobalIdx : "; + for(const auto & i : globIdx ) + logs << i << ", "; + logs << std::endl; + + logs << "\t\t> GlobalIdx balanced : "; + for(const auto & i : globIdxNew ) + logs << i << ", "; + logs << std::endl; + + } + + size_t start = 0; + while( globIdx[ static_cast( world.rank() ) ] >= ( start + 1 ) * dc ){ + start ++; + } + + logs << "\t\t> Start @ rank " << start << std::endl; + + size_t count = globIdx[ static_cast( world.rank() ) ]; + for( auto & it : sfc_map ) { + + if( count >= ( start + 1 ) * dc ){ + start ++; + start = std::min( static_cast( world.size() - 1 ) , start ); + logs << "\t\t> Incrementing Start @ rank " << start << ", count " << count << std::endl; + } + + flags[ it.second ] = static_cast( start ); + + count ++; + } + + return flags; + } + +}; +#endif diff --git a/include/samurai/load_balancing_sfc_w.hpp b/include/samurai/load_balancing_sfc_w.hpp new file mode 100644 index 000000000..5b6780e82 --- /dev/null +++ b/include/samurai/load_balancing_sfc_w.hpp @@ -0,0 +1,348 @@ +/** + * + * This class implements SFC-based load balancing. The difference between this class + * and the one in the load_balancing_sfc.hpp is that, here we try to use level based "weight" + * to dispatch cell between processes in addition to the 1D-SFC key. + * + * This is a try to fix the unbalanced due to the difference of load induced by the difference of + * level between two cells. With the traditional (load_balancing_sfc.hpp) strategy even if the + * load, in term of number of cells on a process, is good, unbalanced on computational time may + * appear. + * + */ +#pragma once + +#include "assertLogTrace.hpp" + +#include "load_balancing.hpp" +#ifdef SAMURAI_WITH_MPI +namespace Load_balancing{ + + template + class SFCw : public samurai::LoadBalancer> + { + private: + + SFC_type_t _sfc; + int _ndomains; + int _rank; + + const double _unbalance_threshold = 0.05; // 5 % + + public: + + using samurai::LoadBalancer>::logs; + + SFCw() + { +#ifdef SAMURAI_WITH_MPI + boost::mpi::communicator world; + _ndomains = world.size(); + _rank = world.rank(); +#else + _ndomains = 1; + _rank = 0; +#endif + } + + inline std::string getName() const + { + return "SFCw_" + _sfc.getName() + "_LB"; + } + + /** + * Compute weights for cell at each level. We expect that small cell (high level) + * cost more computational power that largest cell (low level). But this is impacted + * by numerical scheme used. As default value, we use power of two starting from levelmin + * (low level). + */ + std::vector getWeights( size_t levelmin, size_t levelmax ) { + // Computing weights based on maxlevel + std::vector weights( levelmax + 1 ); + + for( size_t ilvl=levelmin; ilvl<=levelmax; ++ilvl ){ + // weights[ ilvl ] = ( 1 << ( ilvl - levelmin ) ); // prioritize small cell + weights[ ilvl ] = 1 << (ilvl * ilvl); + // weights[ ilvl ] = 1 << ( levelmax - ilvl ) ; // prioritize large cell + // weights[ ilvl ] = 1.; // all equal + logs << fmt::format("\t\t> Level {}, weight for cell : {}", ilvl, weights[ ilvl ] ) << std::endl; + } + + return weights; + } + + template + bool require_balance_impl( Mesh_t & mesh ) { + + boost::mpi::communicator world; + + logs << fmt::format("\n# [SFCw_LoadBalancer::Morton] required_balance_impl ") << std::endl; + + std::vector weights = getWeights( mesh.min_level(), mesh.max_level() ); + + double load = 0; + samurai::for_each_cell( mesh[Mesh_t::mesh_id_t::cells], [&](const auto & cell ){ + load += weights[ cell.level ]; + }); + + std::vector nbLoadPerProc; + boost::mpi::all_gather( world, load, nbLoadPerProc ); + + double load_tot = 0.; + for(size_t i=0; i( world.size() ); + + for(size_t ip=0; ip _unbalance_threshold ) return true; + } + + return false; + } + + /** + * Re-order cells on MPI processes based on the given SFC curve. This need to be + * called once at the beginning (unless ordering is fixed in partition mesh). + * After, load balancing will exchange data based on SFC order and thus only + * boundaries will be moved. + * + */ + template + auto reordering_impl( Mesh_t & mesh ) { + + boost::mpi::communicator world; + + // For debug + // std::ofstream logs; + // logs.open("log_" + std::to_string(_rank) + ".dat", std::ofstream::app); + logs << "# [SFCw_LoadBalancer::Morton] Reordering cells using SFC" << std::endl; + + // SFC 1D key for cells + auto sfc_keys = samurai::make_field( "keys", mesh ); + sfc_keys.fill( 0 ); + + auto flags = samurai::make_field("rank", mesh); + flags.fill( world.rank() ); + + logs << fmt::format("\t> Computing SFC ({}) 1D indices ( cell ) ... ", _sfc.getName() ) << std::endl; + + SFC_key_t mink = std::numeric_limits::max(), maxk = std::numeric_limits::min(); + + samurai::for_each_cell(mesh[Mesh_t::mesh_id_t::cells], [&]( const auto & cell ) { + + // this is where things can get nasty, we expect indices to be positive values !! + xt::xtensor_fixed> ijk; + for (size_t idim = 0; idim < dim; ++idim) { + + // FIX need shift to get only positive index + assert( cell.indices( idim ) >= 0 ); + ijk( idim ) = static_cast( cell.indices( idim ) ) << ( mesh.max_level() - cell.level ); + } + + auto key = _sfc.template getKey( ijk ); + + sfc_keys[ cell ] = key; + + mink = std::min( key, mink ); + maxk = std::max( key, maxk ); + + }); + + // Key boundaries of current process - unused for now + std::vector bounds = { mink, maxk }; + logs << "\t\t> Local key bounds [" << bounds[ 0 ] << ", " << bounds[ 1 ] << "]" << std::endl; + + std::vector boundaries; + boost::mpi::all_gather( world, bounds.data(), static_cast( bounds.size() ), boundaries ); + + logs << "\t\t> Global key boundaries ["; + for(const auto & ik : boundaries ) + logs << ik << ","; + logs << "]" << std::endl; + + // Check overlap with previous/next process. Does not mean that there is no overlap, but at least between "adjacent" + // (MPI-1), (MPI+1) there is not overlap found + std::vector boundaries_new( static_cast( world.size() + 1 ) ); + + // find max value for boundaries + SFC_key_t globalMax = boundaries[ 0 ]; + for(size_t ip=0; ip( world.size() ); + boundaries_new[ 0 ] = 0; + for(size_t ip=1; ip Global key evenly distrib boundaries ["; + for(const auto & ik : boundaries_new ) + logs << ik << ","; + logs << "]" << std::endl; + + // distribute cell based on boundaries & sfc key + std::map comm; + samurai::for_each_cell( mesh[Mesh_t::mesh_id_t::cells], [&](const auto & cell ) { + auto key = sfc_keys[ cell ]; + + // optimize using bisect - find proc that should have this cell + for( size_t ip=0; ip< static_cast( world.size() ); ++ip ){ + if( key >= boundaries_new[ ip ] && key < boundaries_new[ ip + 1 ] ){ + flags[ cell ] = static_cast( ip ); + + // unique list of process that should be contacted + if( comm.find( static_cast( ip ) ) == comm.end() ){ + comm[ static_cast( ip ) ] = true; + } + + break; + } + } + + }); + + return flags; + } + + template + auto load_balance_impl( Mesh_t & mesh ) + { + + boost::mpi::communicator world; + + // std::ofstream logs; + // logs.open( "log_" + std::to_string( world.rank() ) + ".dat", std::ofstream::app ); + logs << fmt::format("\n# [SFCw_LoadBalancer::Morton] Load balancing cells ") << std::endl; + + // SFC 1D key for each cell + std::map sfc_map; + auto sfc_keys = samurai::make_field( "keys", mesh ); + sfc_keys.fill( 0 ); + + // MPI destination rank of process for each cell + auto flags = samurai::make_field("rank", mesh); + flags.fill( world.rank() ); + + // Computing weights based on maxlevel + // std::vector weights( mesh.max_level() + 1 ); + // for( size_t ilvl=mesh.min_level(); ilvl<=mesh.max_level(); ++ilvl ){ + // weights[ ilvl ] = 1 << ( ( ilvl - mesh.min_level() ) ) ; + // // weights[ ilvl ] = 1 << ( mesh.max_level() - ilvl ) ; + // // weights[ ilvl ] = 1.; + // logs << fmt::format("\t\t> Level {}, weight for cell : {}", ilvl, weights[ ilvl ] ) << std::endl; + // } + + std::vector weights = getWeights( mesh.min_level(), mesh.max_level() ); + + + logs << fmt::format("\t> Computing SFC ({}) 1D indices ( cell ) ... ", _sfc.getName() ) << std::endl; + + // SFC_key_t mink = std::numeric_limits::max(), maxk = std::numeric_limits::min(); + + // compute SFC key for each cell + samurai::for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto & cell ) { + + // this is where things can get nasty, we expect indices to be positive values !! + xt::xtensor_fixed> ijk; + for (size_t idim = 0; idim < Mesh_t::dim; ++idim) { + + // FIX need shift to get only positive index + assert( cell.indices( idim ) >= 0 ); + ijk( idim ) = static_cast( cell.indices( idim ) ) << ( mesh.max_level() - cell.level ); + } + + auto key = _sfc.template getKey( ijk ); + + sfc_keys[ cell ] = key; + + if( sfc_map.find( key ) != sfc_map.end() ) { + assert( false ); + std::cerr << fmt::format("Rank # {}, Error computing SFC, index not uniq ! ", world.rank()) << std::endl; + } + + sfc_map[ key ] = cell; + + // mink = std::min( mink, key ); + // maxk = std::max( maxk, key ); + + }); + + assert( mesh.nb_cells( Mesh_t::mesh_id_t::cells ) == sfc_map.size() ); + + size_t nload_tot = 0, dc = 0; + std::vector nbLoadPerProc; + std::vector globIdx( static_cast( world.size() + 1 ) ); + std::vector globIdxNew( static_cast( world.size() + 1 ) ); + + // get load for each MPI process + size_t load = 0; + samurai::for_each_cell( mesh[Mesh_t::mesh_id_t::cells], [&](const auto & cell ){ + load += weights[ cell.level ]; + }); + + boost::mpi::all_gather( world, load, nbLoadPerProc ); + + for(size_t i=0; i( world.size() ); ++i){ + globIdx[ i + 1 ] = globIdx[ i ] + nbLoadPerProc[ i ]; + nload_tot += nbLoadPerProc[ i ]; + } + dc = nload_tot / static_cast( world.size() ); + + logs << fmt::format("\t\t> Load of cells (weighted) : {}, dc : {}", load, dc) << std::endl; + + // load balanced globIdx -> new theoretical key boundaries based on number of cells per proc + globIdxNew[ 0 ] = 0; + for(size_t i=0; i( world.size() ); ++i){ + globIdxNew[ i + 1 ] = globIdxNew[ i ] + dc; + } + + { + logs << "\t\t> GlobalIdx : "; + for(const auto & i : globIdx ) + logs << i << ", "; + logs << std::endl; + + logs << "\t\t> GlobalIdx balanced : "; + for(const auto & i : globIdxNew ) + logs << i << ", "; + logs << std::endl; + + } + + size_t start = 0; + while( globIdx[ static_cast( world.rank() ) ] >= ( start + 1 ) * dc ){ + start ++; + } + + logs << "\t\t> Start @ rank " << start << std::endl; + + size_t count = globIdx[ static_cast( world.rank() ) ]; + for( auto & it : sfc_map ) { + + if( count >= ( start + 1 ) * dc ){ + start ++; + start = std::min( static_cast( world.size() - 1 ) , start ); + logs << "\t\t> Incrementing Start @ rank " << start << ", count " << count << std::endl; + } + + flags[ it.second ] = static_cast( start ); + + count += weights[ it.second.level ]; + } + + return flags; + } + + }; + +} +#endif diff --git a/include/samurai/load_balancing_utils.hpp b/include/samurai/load_balancing_utils.hpp new file mode 100644 index 000000000..0af4acba3 --- /dev/null +++ b/include/samurai/load_balancing_utils.hpp @@ -0,0 +1,267 @@ +// Copyright 2018-2024 the samurai's authors +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include +#include +#include + +#include "ptscotch.h" + +#include "mesh_interval.hpp" +#include "stencil.hpp" + +using namespace xt::placeholders; + +namespace samurai +{ + namespace detail + { + template + inline void for_each_interface_impl(const Mesh& mesh1, const Mesh& mesh2, Func&& func) + { + constexpr std::size_t dim = Mesh::dim; + using interval_t = typename Mesh::interval_t; + using mesh_interval_t = MeshInterval; + const std::size_t max_level_jump = 1; + + auto directions = cartesian_directions(); + + for (std::size_t l_jump = 0; l_jump <= max_level_jump; ++l_jump) + { + if (mesh1.max_level() - mesh1.min_level() >= l_jump) + { + for (std::size_t level = mesh1.min_level(); level <= mesh1.max_level() - l_jump; ++level) + { + for (std::size_t id = 0; id < directions.shape(0); ++id) + { + auto d = xt::view(directions, id); + auto set = intersection(mesh1[level], translate(mesh2[level + l_jump], d)).on(level + l_jump); + set( + [&](const auto& interval, const auto& index) + { + mesh_interval_t mesh_interval_from{level + l_jump, interval - d(0), index - xt::view(d, xt::range(1, _))}; + mesh_interval_t mesh_interval_to{level, interval >> l_jump, index >> l_jump}; + mesh_interval_from.i.step = (1 << l_jump); + for (int ll = 0; ll < (1 << l_jump); ++ll) + { + if (mesh_interval_from.i.start == mesh_interval_from.i.end) + { + break; + } + func(mesh_interval_from, mesh_interval_to); + mesh_interval_from.i.start++; + } + }); + } + } + } + } + } + } + + template + inline void for_each_inner_interface(Mesh& mesh, Func&& func) + { + using mesh_id_t = typename Mesh::mesh_id_t; + + detail::for_each_interface_impl(mesh[mesh_id_t::cells], mesh[mesh_id_t::cells], std::forward(func)); + } + + template + inline void for_each_subdomain_interface(Mesh& mesh, Func&& func) + { + using mesh_id_t = typename Mesh::mesh_id_t; + + for (auto& neigh : mesh.mpi_neighbourhood()) + { + detail::for_each_interface_impl(mesh[mesh_id_t::cells], neigh.mesh[mesh_id_t::cells], std::forward(func)); + } + } + + template + inline auto get_local_interval_location(const Mesh& mesh, const MeshInterval& mesh_interval) + { + std::size_t level = mesh_interval.level; + auto& i = mesh_interval.i; + auto& index = mesh_interval.index; + std::size_t offset = 0; + for (std::size_t ll = 0; ll < level; ++ll) + { + offset += mesh[ll].nb_intervals(0); + } + return mesh[level].get_interval_location(i, index) + offset; + } + + template + inline auto get_local_start_interval(const Mesh& mesh, const MeshInterval& mesh_interval, const LocalIndex& local_index) + { + return local_index[get_local_interval_location(mesh, mesh_interval)] + mesh_interval.i.start; + } + + template + struct graph + { + std::vector global_index_offset; + std::vector xadj; + std::vector adjncy; + std::vector adjwgt; + std::vector xyz; + }; + + template + auto build_graph(const Mesh_t& all_meshes) + { + constexpr std::size_t dim = Mesh_t::dim; + using mesh_id_t = typename Mesh_t::mesh_id_t; + + // graph g; + graph g; + + boost::mpi::communicator world; + + auto& mesh = all_meshes[mesh_id_t::cells]; + + std::vector global_sizes(world.size()); + mpi::all_gather(world, mesh.nb_cells(), global_sizes); + + g.global_index_offset.resize(world.size() + 1); + for (std::size_t i = 1; i < g.global_index_offset.size(); ++i) + { + g.global_index_offset[i] = g.global_index_offset[i - 1] + global_sizes[i - 1]; + } + + // Construct the local index numbering of the cells + std::vector local_index(mesh.nb_intervals(0)); + std::size_t counter = 0; + std::size_t current_size = 0; + for_each_interval(mesh, + [&](std::size_t level, const auto& i, auto& index) + { + local_index[counter++] = current_size - i.start; + current_size += i.size(); + }); + + // Construct the list adjacent cells for each cell + std::map> adj_cells; + std::map> adj_weights; + std::size_t data_size = 0; + + // interior interface + for_each_inner_interface( + all_meshes, + [&](const auto& mesh_interval_from, const auto& mesh_interval_to) + { + auto start_from = get_local_start_interval(mesh, mesh_interval_from, local_index) + g.global_index_offset[world.rank()]; + auto start_to = get_local_start_interval(mesh, mesh_interval_to, local_index) + g.global_index_offset[world.rank()]; + + // std::cout << "From: " << mesh_interval_from.level << " " << mesh_interval_from.i << " " << mesh_interval_from.index + // << std::endl; + // std::cout << "To: " << mesh_interval_to.level << " " << mesh_interval_to.i << " " << mesh_interval_to.index << std::endl; + auto step = mesh_interval_from.i.step; + for (int i = 0; i < mesh_interval_to.i.size(); ++i) + { + // std::cout << i << " " << step << " " << start_from + i * step << " " << start_to + i << std::endl; + adj_cells[start_from + i * step].push_back(start_to + i); + adj_weights[start_from + i * step].push_back(0.5 * (mesh_interval_from.level + mesh_interval_to.level)); + data_size++; + if (mesh_interval_from.level != mesh_interval_to.level) + { + adj_cells[start_to + i].push_back(start_from + i * step); + adj_weights[start_to + i].push_back(0.5 * (mesh_interval_from.level + mesh_interval_to.level)); + data_size++; + } + } + // std::cout << std::endl; + }); + + std::vector req; + std::vector> to_send(all_meshes.mpi_neighbourhood().size()); + std::set recv_from; + + std::size_t i_neigh = 0; + for (auto& neigh : all_meshes.mpi_neighbourhood()) + { + detail::for_each_interface_impl(mesh, + neigh.mesh[mesh_id_t::cells], + [&](const auto&, const auto& mesh_interval) + { + recv_from.insert(neigh.rank); + auto start = get_local_start_interval(mesh, mesh_interval, local_index) + + g.global_index_offset[world.rank()]; + to_send[i_neigh].push_back(static_cast(start)); + }); + if (recv_from.find(neigh.rank) != recv_from.end()) + { + req.push_back(world.isend(neigh.rank, neigh.rank, to_send[i_neigh++])); + } + } + + for (auto& neigh : all_meshes.mpi_neighbourhood()) + { + if (recv_from.find(neigh.rank) != recv_from.end()) + { + std::vector to_recv; + world.recv(neigh.rank, world.rank(), to_recv); + + std::size_t to_recv_counter = 0; + detail::for_each_interface_impl( + neigh.mesh[mesh_id_t::cells], + mesh, + [&](const auto& mesh_interval, const auto& mesh_interval_to) + { + auto start_1 = get_local_start_interval(mesh, mesh_interval, local_index) + g.global_index_offset[world.rank()]; + auto start_2 = to_recv[to_recv_counter++]; + + for (int i = 0, i1 = start_1, i2 = start_2; i < mesh_interval.i.size(); ++i, ++i1, ++i2) + { + adj_cells[i1].push_back(i2); + adj_weights[start_1 + i].push_back(0.5 * (mesh_interval.level + mesh_interval_to.level)); + data_size += 1; + } + }); + } + } + + mpi::wait_all(req.begin(), req.end()); + + // Construct the adjacency list in CSR format + g.xadj.resize(mesh.nb_cells() + 1); + + g.xadj[0] = 0; + std::size_t i_xadj = 0; + for (auto& v : adj_cells) + { + g.xadj[i_xadj + 1] = g.xadj[i_xadj] + v.second.size(); + ++i_xadj; + } + + g.adjncy.resize(g.xadj.back()); + g.adjwgt.resize(g.xadj.back()); + i_xadj = 0; + for (auto& v : adj_cells) + { + std::copy(v.second.begin(), v.second.end(), g.adjncy.begin() + g.xadj[i_xadj]); + ++i_xadj; + } + + i_xadj = 0; + for (auto& v : adj_weights) + { + std::copy(v.second.begin(), v.second.end(), g.adjwgt.begin() + g.xadj[i_xadj]); + ++i_xadj; + } + + std::cout << data_size << " " << g.xadj.back() << std::endl; + // assert(data_size == g.xadj.back()); + g.xyz.reserve(mesh.nb_cells() * dim); + for_each_cell(mesh, + [&](const auto& cell) + { + auto center = cell.center(); + std::copy(center.begin(), center.end(), std::back_inserter(g.xyz)); + }); + return g; + } +} diff --git a/include/samurai/load_balancing_void.hpp b/include/samurai/load_balancing_void.hpp new file mode 100644 index 000000000..06f896bb8 --- /dev/null +++ b/include/samurai/load_balancing_void.hpp @@ -0,0 +1,52 @@ +/** + * Empty class, used for test to compare with and without load balancing. + * +*/ + +#pragma once + +#include +#include "load_balancing.hpp" + +#ifdef SAMURAI_WITH_MPI +template +class Void_LoadBalancer: public samurai::LoadBalancer> { + + private: + int _ndomains; + int _rank; + + public: + + Void_LoadBalancer() { +#ifdef SAMURAI_WITH_MPI + boost::mpi::communicator world; + _ndomains = world.size(); + _rank = world.rank(); +#else + _ndomains = 1; + _rank = 0; +#endif + } + + inline std::string getName() const { return "Void_LB"; } + + template + bool require_balance_impl( [[maybe_unused]] Mesh_t & mesh ) { return false; } + + template + auto reordering_impl( Mesh_t & mesh ) { + auto flags = samurai::make_field("balancing_flags", mesh); + flags.fill( _rank ); + return flags; + } + + template + auto load_balance_impl( Mesh_t & mesh ){ + auto flags = samurai::make_field("balancing_flags", mesh); + flags.fill( _rank ); + return flags; + } + +}; +#endif diff --git a/include/samurai/mesh.hpp b/include/samurai/mesh.hpp index 4f8387551..207661b6b 100644 --- a/include/samurai/mesh.hpp +++ b/include/samurai/mesh.hpp @@ -104,6 +104,8 @@ namespace samurai bool is_periodic(std::size_t d) const; const std::array& periodicity() const; // std::vector& neighbouring_ranks(); + + const std::vector& mpi_neighbourhood() const; std::vector& mpi_neighbourhood(); void swap(Mesh_base& mesh) noexcept; @@ -132,12 +134,16 @@ namespace samurai void update_mesh_neighbour(); void to_stream(std::ostream& os) const; + void merge(ca_type& lca); + void remove(ca_type& lca); + protected: using derived_type = D; Mesh_base() = default; // cppcheck-suppress uninitMemberVar Mesh_base(const cl_type& cl, const self_type& ref_mesh); + Mesh_base(const ca_type& ca, const self_type& ref_mesh); Mesh_base(const cl_type& cl, std::size_t min_level, std::size_t max_level); Mesh_base(const samurai::Box& b, std::size_t start_level, @@ -153,6 +159,9 @@ namespace samurai double approx_box_tol = lca_type::default_approx_box_tol, double scaling_factor = 0); + // Used for load balancing + Mesh_base(const cl_type& cl, std::size_t min_level, std::size_t max_level, std::vector& neighbourhood); + derived_type& derived_cast() & noexcept; const derived_type& derived_cast() const& noexcept; derived_type derived_cast() && noexcept; @@ -194,7 +203,7 @@ namespace samurai ar& m_subdomain; ar& m_union; ar& m_min_level; - ar& m_min_level; + ar& m_max_level; } #endif }; @@ -233,7 +242,6 @@ namespace samurai #ifdef SAMURAI_WITH_MPI partition_mesh(start_level, b); - // load_balancing(); #else this->m_cells[mesh_id_t::cells][start_level] = {start_level, b, approx_box_tol, scaling_factor_}; #endif @@ -277,6 +285,8 @@ namespace samurai set_origin_point(origin_point()); set_scaling_factor(scaling_factor()); + update_mesh_neighbour(); + } template @@ -292,7 +302,7 @@ namespace samurai construct_subdomain(); m_domain = m_subdomain; construct_union(); - update_sub_mesh(); + update_sub_mesh(); // MPI AllReduce inside renumbering(); update_mesh_neighbour(); @@ -321,6 +331,48 @@ namespace samurai set_scaling_factor(ref_mesh.scaling_factor()); } + template + inline Mesh_base::Mesh_base(const ca_type& ca, const self_type& ref_mesh) + : m_domain(ref_mesh.m_domain) + , m_min_level(ref_mesh.m_min_level) + , m_max_level(ref_mesh.m_max_level) + , m_periodic(ref_mesh.m_periodic) + , m_mpi_neighbourhood(ref_mesh.m_mpi_neighbourhood) + + { + m_cells[mesh_id_t::cells] = ca; + + construct_subdomain(); + construct_union(); + update_sub_mesh(); + renumbering(); + update_mesh_neighbour(); + } + + template + inline Mesh_base::Mesh_base(const cl_type& cl, + std::size_t min_level, + std::size_t max_level, + std::vector& neighbourhood) + : m_min_level(min_level) + , m_max_level(max_level) + , m_mpi_neighbourhood(neighbourhood) + { + m_periodic.fill(false); + assert(min_level <= max_level); + + // what to do with m_domain ? + m_domain = m_subdomain; + + m_cells[mesh_id_t::cells] = {cl, false}; + + construct_subdomain(); // required ? + construct_union(); // required ? + update_sub_mesh(); // perform MPI allReduce calls + renumbering(); // required ? + update_mesh_neighbour(); // required to do that here ?? + } + template inline auto Mesh_base::cells() -> mesh_t& { @@ -506,6 +558,12 @@ namespace samurai return m_periodic; } + template + inline auto Mesh_base::mpi_neighbourhood() const -> const std::vector& + { + return m_mpi_neighbourhood; + } + template inline auto Mesh_base::mpi_neighbourhood() -> std::vector& { @@ -667,7 +725,8 @@ namespace samurai int product_of_sizes = 1; for (std::size_t d = 0; d < dim - 1; ++d) { - sizes[d] = static_cast(floor(pow(size, 1. / dim) * global_box.length()[d] / length_harmonic_avg)); + sizes[d] = std::max(static_cast(floor(pow(size, 1. / dim) * global_box.length()[d] / length_harmonic_avg)), 1); + product_of_sizes *= sizes[d]; } sizes[dim - 1] = size / product_of_sizes; @@ -715,6 +774,8 @@ namespace samurai this->m_cells[mesh_id_t::cells][start_level] = {start_level, subdomain_box}; */ + /** delete for git rebase + **/ lcl_type subdomain_cells(start_level, m_domain.origin_point(), m_domain.scaling_factor()); auto subdomain_nb_intervals = m_domain.nb_intervals() / static_cast(size); auto subdomain_start = static_cast(rank) * subdomain_nb_intervals; @@ -732,6 +793,9 @@ namespace samurai }); this->m_cells[mesh_id_t::cells][start_level] = subdomain_cells; + // end comment for git rebase + // this->m_cells[mesh_id_t::cells][start_level] = {start_level, subdomain_box}; + m_mpi_neighbourhood.reserve(static_cast(size) - 1); for (int ir = 0; ir < size; ++ir) @@ -746,110 +810,81 @@ namespace samurai // m_mpi_neighbourhood.reserve(static_cast(pow(3, dim) - 1)); // auto neighbour = [&](xt::xtensor_fixed> shift) // { - // auto neighbour_rank = rank; - // int product_of_preceding_sizes = 1; - // for (std::size_t d = 0; d < dim; ++d) - // { - // neighbour_rank += product_of_preceding_sizes * shift[d]; - // product_of_preceding_sizes *= sizes[d]; - // } - // return neighbour_rank; - // }; - - // static_nested_loop( - // [&](auto& shift) - // { - // if (xt::any(shift)) - // { - // for (std::size_t d = 0; d < dim; ++d) - // { - // if (coords[d] + shift[d] < 0 || coords[d] + shift[d] >= sizes[d]) - // { - // return; - // } - // } - // m_mpi_neighbourhood.push_back(neighbour(shift)); - // } - // }); - + // auto neighbour_rank = rank; + // int product_of_preceding_sizes = 1; + // for (std::size_t d = 0; d < dim; ++d) + // { + // neighbour_rank += product_of_preceding_sizes * shift[d]; + // product_of_preceding_sizes *= sizes[d]; + // } + // return neighbour_rank; + //}; + + //static_nested_loop( + // [&](auto& shift) + // { + // if (xt::any(shift)) + // { + // for (std::size_t d = 0; d < dim; ++d) + // { + // if (coords[d] + shift[d] < 0 || coords[d] + shift[d] >= sizes[d]) + // { + // return; + // } + // } + // m_mpi_neighbourhood.push_back(neighbour(shift)); + // } + // }); #endif } template - void Mesh_base::load_balancing() + void Mesh_base::merge(ca_type& lca) { -#ifdef SAMURAI_WITH_MPI - mpi::communicator world; - auto rank = world.rank(); + // merge received cells - std::size_t load = nb_cells(mesh_id_t::cells); - std::vector loads; + auto& refmesh = this->m_cells[mesh_id_t::cells]; - std::vector load_fluxes(m_mpi_neighbourhood.size(), 0); + auto minlevel = std::min(refmesh.min_level(), lca.min_level()); + auto maxlevel = std::max(refmesh.max_level(), lca.max_level()); - const std::size_t n_iterations = 1; - - for (std::size_t k = 0; k < n_iterations; ++k) + cl_type cl; + for (size_t ilvl = minlevel; ilvl <= maxlevel; ++ilvl) { - world.barrier(); - if (rank == 0) - { - std::cout << "---------------- k = " << k << " ----------------" << std::endl; - } - mpi::all_gather(world, load, loads); - - std::vector nb_neighbours; - mpi::all_gather(world, m_mpi_neighbourhood.size(), nb_neighbours); + auto un = samurai::union_(refmesh[ilvl], lca[ilvl]); - double load_np1 = static_cast(load); - for (std::size_t i_rank = 0; i_rank < m_mpi_neighbourhood.size(); ++i_rank) - { - auto neighbour = m_mpi_neighbourhood[i_rank]; - - auto neighbour_load = loads[static_cast(neighbour.rank)]; - int neighbour_load_minus_my_load; - if (load < neighbour_load) + un( + [&](auto& interval, auto& indices) { - neighbour_load_minus_my_load = static_cast(neighbour_load - load); - } - else - { - neighbour_load_minus_my_load = -static_cast(load - neighbour_load); - } - double weight = 1. / std::max(m_mpi_neighbourhood.size(), nb_neighbours[neighbour.rank]); - load_fluxes[i_rank] = weight * neighbour_load_minus_my_load; - load_np1 += load_fluxes[i_rank]; - } - load_np1 = floor(load_np1); - - load_transfer(load_fluxes); - - std::cout << rank << ": load = " << load << ", load_np1 = " << load_np1 << std::endl; - - load = static_cast(load_np1); + cl[ilvl][indices].add_interval(interval); + }); } -#endif + + refmesh = {cl, false}; } template - void Mesh_base::load_transfer([[maybe_unused]] const std::vector& load_fluxes) + void Mesh_base::remove(ca_type& lca) { -#ifdef SAMURAI_WITH_MPI - mpi::communicator world; - std::cout << world.rank() << ": "; - for (std::size_t i_rank = 0; i_rank < m_mpi_neighbourhood.size(); ++i_rank) + auto& refmesh = this->m_cells[mesh_id_t::cells]; + + // remove cells + cl_type cl; + size_t diff_ncells = 0; + for (size_t ilvl = refmesh.min_level(); ilvl <= refmesh.max_level(); ++ilvl) { - auto neighbour = m_mpi_neighbourhood[i_rank]; - if (load_fluxes[i_rank] < 0) // must tranfer load to the neighbour - { - } - else if (load_fluxes[i_rank] > 0) // must receive load from the neighbour - { - } - std::cout << "--> " << neighbour.rank << ": " << load_fluxes[i_rank] << ", "; + auto diff = samurai::difference(refmesh[ilvl], lca[ilvl]); + + diff( + [&](auto& interval, auto& index) + { + cl[ilvl][index].add_interval(interval); + diff_ncells += interval.size(); + }); } - std::cout << std::endl; -#endif + + // new mesh for current process + refmesh = {cl, false}; } template diff --git a/include/samurai/morton.hpp b/include/samurai/morton.hpp new file mode 100644 index 000000000..da29d3d79 --- /dev/null +++ b/include/samurai/morton.hpp @@ -0,0 +1,155 @@ +#pragma once + +#include +#include + +#include "sfc.hpp" + +class Morton : public SFC { + + private: + + /* + * Split bit by 3, version for 2D, used to compute morton index + * + * Parameters: + * logical_a : coordonnées logiques (i ou j) + * + * return : + * x : unsigned 64bits with splited by 3 + */ + inline SFC_key_t splitBy3_2D( uint32_t logical_a ) const { + + SFC_key_t x = logical_a & 0xffffffff; + x = (x | x << 16) & 0xffff0000ffff; + x = (x | x << 8) & 0xff00ff00ff00ff; + x = (x | x << 4) & 0xf0f0f0f0f0f0f0f; + x = (x | x << 2) & 0x3333333333333333; + x = (x | x << 1) & 0x5555555555555555; + + return x; + } + + /* + * Split bit by 3, version for 3D, used to compute morton index + * + * Parameters: + * logical_a : coordonnées logiques (i ou j ou k) + * + * return : + * x : unsigned 64bits with splited by 3 + */ + inline SFC_key_t splitBy3_3D(uint32_t logical_a) const { + + SFC_key_t x = logical_a & 0x1fffff; + + x = (x | x << 32) & 0x1f00000000ffff; + x = (x | x << 16) & 0x1f0000ff0000ff; + x = (x | x << 8) & 0x100f00f00f00f00f; + x = (x | x << 4) & 0x10c30c30c30c30c3; + x = (x | x << 2) & 0x1249249249249249; + + return x; + + } + + + public: + + inline std::string getName() const { return "Morton"; } + + /* + * Return the morton index from logical coordinate (i,j) or (i,j,k) + * + * Parameters: + * lc : structure containing logical coordinate + * + * Return : + * key : morton key + */ + template + inline SFC_key_t getKey_2D_impl( const Coord_t & lc ) const { + SFC_key_t la_clef = 0; + la_clef |= splitBy3_2D( lc( 0 ) ) | splitBy3_2D( lc( 1 ) ) << 1; + return la_clef; + } + + template + inline SFC_key_t getKey_3D_impl( const Coord_t & lc ) const { + SFC_key_t la_clef = 0; + la_clef |= splitBy3_3D( lc( 0 ) ) | splitBy3_3D( lc( 1 ) ) << 1 | splitBy3_3D( lc( 2 ) ) << 2; + return la_clef; + } + + /* + * Return the logical coordinate (i,j) or (i,j,k) from the morton index + * + * Parameters: + * clef (in): morton index + * + * Return : + * lc (out): 2D/3D logical coordinates + */ + inline auto getCoordinates_2D_impl( const SFC_key_t & clef ) const { + + xt::xtensor_fixed> lc = { 0, 0 }; + + // Extract coord i + SFC_key_t keyi = clef >> 0; + keyi &= 0x5555555555555555; + keyi = (keyi ^ (keyi >> 1)) & 0x3333333333333333; + keyi = (keyi ^ (keyi >> 2)) & 0x0f0f0f0f0f0f0f0f; + keyi = (keyi ^ (keyi >> 4)) & 0x00ff00ff00ff00ff; + keyi = (keyi ^ (keyi >> 8)) & 0x0000ffff0000ffff; + keyi = (keyi ^ (keyi >> 16)) & 0x00000000ffffffff; + lc( 0 ) = static_cast( keyi ); + + // extract coord j + SFC_key_t keyj = clef >> 1; + keyj &= 0x5555555555555555; + keyj = (keyj ^ (keyj >> 1)) & 0x3333333333333333; + keyj = (keyj ^ (keyj >> 2)) & 0x0f0f0f0f0f0f0f0f; + keyj = (keyj ^ (keyj >> 4)) & 0x00ff00ff00ff00ff; + keyj = (keyj ^ (keyj >> 8)) & 0x0000ffff0000ffff; + keyj = (keyj ^ (keyj >> 16)) & 0x00000000ffffffff; + lc( 1 ) = static_cast( keyj ); + + return lc; + } + + inline auto getCoordinates_3D_impl( const SFC_key_t & clef ) const { + + xt::xtensor_fixed> lc = { 0, 0, 0 }; + + // Extract coord i + SFC_key_t keyi = clef >> 0; + keyi &= 0x1249249249249249; + keyi = (keyi ^ (keyi >> 2)) & 0x30c30c30c30c30c3; + keyi = (keyi ^ (keyi >> 4)) & 0xf00f00f00f00f00f; + keyi = (keyi ^ (keyi >> 8)) & 0x00ff0000ff0000ff; + keyi = (keyi ^ (keyi >> 16)) & 0x00ff00000000ffff; + keyi = (keyi ^ (keyi >> 32)) & 0x1fffff; + lc( 0 ) = static_cast( keyi ); // assert for overflow ? + + SFC_key_t keyj = clef >> 1; + keyj &= 0x1249249249249249; + keyj = (keyj ^ (keyj >> 2)) & 0x30c30c30c30c30c3; + keyj = (keyj ^ (keyj >> 4)) & 0xf00f00f00f00f00f; + keyj = (keyj ^ (keyj >> 8)) & 0x00ff0000ff0000ff; + keyj = (keyj ^ (keyj >> 16)) & 0x00ff00000000ffff; + keyj = (keyj ^ (keyj >> 32)) & 0x1fffff; + lc( 1 ) = static_cast( keyj ); + + SFC_key_t keyk = clef >> 2; + keyk &= 0x1249249249249249; + keyk = (keyk ^ (keyk >> 2)) & 0x30c30c30c30c30c3; + keyk = (keyk ^ (keyk >> 4)) & 0xf00f00f00f00f00f; + keyk = (keyk ^ (keyk >> 8)) & 0x00ff0000ff0000ff; + keyk = (keyk ^ (keyk >> 16)) & 0x00ff00000000ffff; + keyk = (keyk ^ (keyk >> 32)) & 0x1fffff; + lc( 2 ) = static_cast( keyk ); + + return lc; + } + +}; diff --git a/include/samurai/mr/mesh.hpp b/include/samurai/mr/mesh.hpp index 2ab62fbb4..54a6d633f 100644 --- a/include/samurai/mr/mesh.hpp +++ b/include/samurai/mr/mesh.hpp @@ -73,6 +73,7 @@ namespace samurai MRMesh() = default; MRMesh(const cl_type& cl, const self_type& ref_mesh); + MRMesh(const ca_type& ca, const self_type& ref_mesh); MRMesh(const cl_type& cl, std::size_t min_level, std::size_t max_level); MRMesh(const samurai::Box& b, std::size_t min_level, @@ -85,6 +86,13 @@ namespace samurai const std::array& periodic, double approx_box_tol = lca_type::default_approx_box_tol, double scaling_factor = 0); + // Used for load balancing, finally commented + //MRMesh(const cl_type & cl, + // std::size_t min_level, + // std::size_t max_level, + // std::vector & neighbourhood, + // double approx_box_tol = lca_type::default_approx_box_tol, + // double scaling_factor = 0); void update_sub_mesh_impl(); @@ -98,6 +106,12 @@ namespace samurai { } + template + inline MRMesh::MRMesh(const ca_type& ca, const self_type& ref_mesh) + : base_type(ca, ref_mesh) + { + } + template inline MRMesh::MRMesh(const cl_type& cl, std::size_t min_level, std::size_t max_level) : base_type(cl, min_level, max_level) @@ -125,6 +139,13 @@ namespace samurai { } + // template + // inline MRMesh::MRMesh( const cl_type & cl, std::size_t min_level, std::size_t max_level, + // std::vector & neighbourhood) + // : base_type(cl, min_level, max_level, neighbourhood ) + // { + // } + template inline void MRMesh::update_sub_mesh_impl() { diff --git a/include/samurai/samurai.hpp b/include/samurai/samurai.hpp index f1a449d6f..bb04345af 100644 --- a/include/samurai/samurai.hpp +++ b/include/samurai/samurai.hpp @@ -31,6 +31,7 @@ namespace samurai #ifdef SAMURAI_WITH_MPI MPI_Init(&argc, &argv); + boost::mpi::communicator world; #endif times::timers.start("total runtime"); return app; @@ -46,6 +47,7 @@ namespace samurai #ifdef SAMURAI_WITH_MPI MPI_Init(nullptr, nullptr); #endif + times::timers.start("smr::total_runtime"); } inline void finalize() diff --git a/include/samurai/sfc.hpp b/include/samurai/sfc.hpp new file mode 100644 index 000000000..cd2561b28 --- /dev/null +++ b/include/samurai/sfc.hpp @@ -0,0 +1,40 @@ +#pragma once + +#include + +#include "assertLogTrace.hpp" + +using SFC_key_t = uint64_t; + +template +class SFC { + + public: + + /** + * + * Interface to compute Space Filling Curve 1D index from + * 2D/3D logical coordinates of cell. + * + */ + template + inline SFC_key_t getKey( const Coord_t & lc ) const{ + if constexpr ( dim == 2 ) return static_cast( this )->getKey_2D_impl( lc ); + if constexpr ( dim == 3 ) return static_cast( this )->getKey_3D_impl( lc ); + } + + /** + * + * Interface to compute logical coordinate from Space Filling Curve 1D index. + * + */ + template + inline auto getCoordinates( const SFC_key_t & clef ) const{ + if constexpr ( dim == 2 ) return static_cast( this )->getCoordinates_2D_impl( clef ); + if constexpr ( dim == 3 ) return static_cast( this )->getCoordinates_3D_impl( clef ); + } + + inline auto getName() const -> std::string { + return static_cast( this )->getName(); + } +}; \ No newline at end of file diff --git a/include/samurai/statistics.hpp b/include/samurai/statistics.hpp index 14a674ddb..4e58aa676 100644 --- a/include/samurai/statistics.hpp +++ b/include/samurai/statistics.hpp @@ -14,16 +14,18 @@ namespace samurai #if defined(WITH_STATS) struct Statistics { - Statistics(const std::string& filename, int save_all = 10) - : filename(filename) - , save_all(save_all) + Statistics( const std::string & filename, std::size_t save_all = 10) + : _outfile( filename ) , icurrent(0) + , save_all(save_all) { } template - void operator()(std::string test_case, const Mesh& mesh) + void operator()( std::string test_case, Mesh& mesh ) { + + icurrent++; using mesh_id_t = typename Mesh::mesh_id_t; auto ca = mesh[mesh_id_t::cells]; @@ -32,6 +34,10 @@ namespace samurai std::size_t min_level = ca.min_level(); std::size_t max_level = ca.max_level(); + size_t n_neighbours = mesh.mpi_neighbourhood().size(); + size_t n_cells_l = mesh.nb_cells( Mesh::mesh_id_t::cells ); + size_t n_cells_g = mesh.nb_cells( Mesh::mesh_id_t::reference ) - mesh.nb_cells( Mesh::mesh_id_t::cells ); + auto comp = [](const auto& a, const auto& b) { return a.size() < b.size(); @@ -72,6 +78,9 @@ namespace samurai stats[test_case].push_back({ {"min_level", min_level}, {"max_level", max_level}, + {"n_neighbours", n_neighbours}, + {"n_cells_g", n_cells_g}, // ghost ? + {"n_cells_l", n_cells_l}, // leaves {"by_level", by_level } }); } @@ -81,6 +90,9 @@ namespace samurai out.push_back({ {"min_level", min_level}, {"max_level", max_level}, + {"n_neighbours", n_neighbours}, + {"n_cells_g", n_cells_g}, // ghost ? + {"n_cells_l", n_cells_l}, // leaves {"by_level", by_level } }); stats[test_case] = out; @@ -88,7 +100,7 @@ namespace samurai if (icurrent == save_all) { - std::ofstream ofile(filename); + std::ofstream ofile( _outfile ); ofile << std::setw(4) << stats << std::endl; icurrent = 0; } @@ -96,17 +108,16 @@ namespace samurai ~Statistics() { - std::ofstream file(filename); + std::ofstream file( _outfile ); file << std::setw(4) << stats; } - std::string filename; + std::string _outfile; json stats; std::size_t icurrent; - int save_all; + std::size_t save_all; }; - auto statistics = Statistics("stats.json"); #else template void statistics(const std::string& test_case, const Mesh& mesh){}; diff --git a/include/samurai/timers.hpp b/include/samurai/timers.hpp index e3ab002a5..90b764e5a 100644 --- a/include/samurai/timers.hpp +++ b/include/samurai/timers.hpp @@ -297,3 +297,4 @@ namespace samurai } } + diff --git a/python/read_stats.py b/python/read_stats.py index adb4f9193..1f13c2337 100644 --- a/python/read_stats.py +++ b/python/read_stats.py @@ -1,15 +1,85 @@ + +# +# +# categories = ['A', 'B', 'C', 'D'] +# valeurs_moyennes = [3, 7, 5, 9] +# valeurs_min = [2, 5, 4, 8] +# valeurs_max = [4, 9, 6, 10] + +# # Création du graphique +# plt.figure(figsize=(10, 6)) + +# # Tracé des barres +# plt.bar(categories, valeurs_moyennes, yerr=[(v_min, v_max) for v_min, v_max in zip(valeurs_min, valeurs_max)], capsize=5) + +import numpy +import argparse import pandas as pd import matplotlib.pyplot as plt -test_case = "D2Q4444_Euler_Lax_Liu" +# arguments parser +CLI = argparse.ArgumentParser() + +# list of PDF files +CLI.add_argument("--file", type=str, default=[""], help="stats file") -data = pd.read_json('stats.json') +# parse the command line +args = CLI.parse_args() + +test_case = "stats" + +data = pd.read_json( args.file ) data = pd.json_normalize(data[test_case]) +print(type(print(data.min_level))) +print(data.min_level) + min_level = data.min_level.min() max_level = data.max_level.max() levels = range(min_level, max_level+1) +print("> Levels: {}".format(levels)) + +def plot_minmax( suffix, title, xlabel, ylabel, ax, level_range, kind='box', legend=None): + """ + level_range (in) : range of levels (min_level, max_level) + """ + print("\t> [plot_minmax] Entering .. ") + # keys in dict. of data + _tmp_fields = [] + _tmp_name = [] + _minmaxave = {} + for elem in ["min", "max", "ave"]: + _fields = [f'by_level.{l:02}.{suffix}.{elem}' for l in level_range] + + _minmaxave[elem] = data[ _fields ].to_numpy()[0] + + _tmp_fields.append( _fields ) + _tmp_name.append( {f: l for f, l in zip(_fields, levels)} ) + + # ax.set_xlabel(xlabel) + # ax.set_ylabel(ylabel) + ax.set_xlim( level_range.start-1, level_range.stop ) + ax.set_title(title, fontweight="bold", fontsize=10) + + x = numpy.arange(min_level, max_level+1) + spacing = 0.3 # spacing between hat groups + width = (1 - spacing) / x.shape[0] + ax.set_xticks( x, labels=x ) + + ax.errorbar( x, _minmaxave["ave"], yerr=[_minmaxave["ave"]-_minmaxave["min"], _minmaxave["max"]-_minmaxave["ave"]], \ + ls='None', marker='o', ms=6, capsize=5, c="b" ) + + # heights0 = values[0] + # for i, (heights, group_label) in enumerate(zip(values, group_labels)): + # style = {'fill': False} if i == 0 else {'edgecolor': 'black'} + # rects = ax.bar(x - spacing/2 + i * width, heights - heights0, + # width, bottom=heights0, label=group_label, **style) + # label_bars(heights, rects) + + # print(to_plot) + # print(type(to_plot)) + def plot(suffix, title, xlabel, ylabel, ax, kind='box', legend=None, stacked=True): fields = [f'by_level.{l:02}.{suffix}' for l in levels] new_name = {f: l for f, l in zip(fields, levels)} @@ -18,20 +88,35 @@ def plot(suffix, title, xlabel, ylabel, ax, kind='box', legend=None, stacked=Tru to_plot.plot(kind=kind, ax=ax, stacked=stacked) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) - ax.set_title(title, fontweight="bold") + ax.set_title(title, fontweight="bold", fontsize=10) if legend: ax.legend(title=legend) nrow, ncol = 3, 4 fig = plt.figure(figsize=(5*ncol, 5*nrow)) -plot("cells", "Number of cells per level", "Time iteration", "number of cells", fig.add_subplot(nrow, 2, 1), kind='area', legend="Level") +plot("cells", "Number of cells per level", "Time iteration", "number of cells", fig.add_subplot(nrow, 2, 1), kind="box", legend="Level") + +plot("axis-0.number of intervals", "Number of intervals per level in x-axis", "Time iteration", \ + "number of intervals", fig.add_subplot(nrow, 2, 3), kind='box', legend="Level", stacked=False) + +plot("axis-1.number of intervals", "Number of intervals per level in y-axis", "Time iteration", \ + "number of intervals", fig.add_subplot(nrow, 2, 5), kind='box', legend="Level", stacked=False) + +# plot min/max numbers of cells per level +# plot("axis-0.cells per interval.min", "Minimum of cells per interval\n in x-axis", "level", \ +# "number of cells", fig.add_subplot(nrow, ncol, 3)) + +# plot("axis-0.cells per interval.max", "Maximum of cells per interval\n in x-axis", "level", \ +# "number of cells", fig.add_subplot(nrow, ncol, 4)) -plot("axis-0.number of intervals", "Number of intervals per level in x-axis", "Time iteration", "number of intervals", fig.add_subplot(nrow, 2, 3), kind='area', legend="Level", stacked=False) -plot("axis-1.number of intervals", "Number of intervals per level in y-axis", "Time iteration", "number of intervals", fig.add_subplot(nrow, 2, 5), kind='area', legend="Level", stacked=False) +ax = fig.add_subplot(nrow, ncol, 3) +plot_minmax("axis-0.cells per interval", "[Min, Max, Ave] Cells per interval in x-axis", "Level", \ + "number of cells", fig.add_subplot(nrow, ncol, 3), level_range=levels, kind='box', legend="Level") +plot_minmax("axis-1.cells per interval", "[Min, Max, Ave] Cells per interval in y-axis", "Level", \ + "number of cells", fig.add_subplot(nrow, ncol, 4), level_range=levels, kind='box', legend="Level") +# ax.bar(numpy.arange(min_level, max_level+1), valeurs_moyennes, yerr=[(v_min, v_max) for v_min, v_max in zip(valeurs_min, valeurs_max)], capsize=5) -plot("axis-0.cells per interval.min", "Minimum of cells per interval\n in x-axis", "level", "number of cells", fig.add_subplot(nrow, ncol, 3)) -plot("axis-0.cells per interval.max", "Maximum of cells per interval\n in x-axis", "level", "number of cells", fig.add_subplot(nrow, ncol, 4)) plot("axis-1.cells per interval.min", "Minimum of cells per interval\n in y-axis", "level", "number of cells", fig.add_subplot(nrow, ncol, 7)) plot("axis-1.cells per interval.max", "Maximum of cells per interval\n in y-axis", "level", "number of cells", fig.add_subplot(nrow, ncol, 8)) diff --git a/python/stats_mpi.py b/python/stats_mpi.py new file mode 100644 index 000000000..4f3337db6 --- /dev/null +++ b/python/stats_mpi.py @@ -0,0 +1,160 @@ + +# +# +# categories = ['A', 'B', 'C', 'D'] +# valeurs_moyennes = [3, 7, 5, 9] +# valeurs_min = [2, 5, 4, 8] +# valeurs_max = [4, 9, 6, 10] + +# # Création du graphique +# plt.figure(figsize=(10, 6)) + +# # Tracé des barres +# plt.bar(categories, valeurs_moyennes, yerr=[(v_min, v_max) for v_min, v_max in zip(valeurs_min, valeurs_max)], capsize=5) + +import numpy +import re +import argparse +import pandas as pd +import matplotlib.pyplot as plt + +def plot_mma( data, suffix, ax, xlabel, ylabel, keys=None ): + """ + """ + + # select specific domaine if not None + if keys == None: + keys = data.keys() + + print("plot data for domaine {}".format(keys)) + + for mprank in keys: + levels = range( data[ mprank ].min_level.min(), data[ mprank ].max_level.max()+1 ) + + for elem in ["min", "max", "ave"]: + fields = [f'by_level.{l:02}.{suffix}.{elem}' for l in levels] + print("\t>[plot] Getting field: '{}'".format(fields)) + + # x = numpy.arange( levels.start, levels.stop ) + # y = data[ mprank ][ fields ].to_numpy()[0] + + # ax.plot( x, y, ls='None', markersize=5, marker='o' ) + + ax.set_xlabel( xlabel, fontsize=10 ) + ax.set_ylabel( ylabel, fontsize=10 ) + +def plot_by_level( data, suffix, ax, xlabel, ylabel, keys=None ): + """ + Plot data level by level + """ + + # select specific domaine if not None + if keys == None: + keys = data.keys() + + print("plot data for domaine {}".format(keys)) + + for mprank in keys: + levels = range( data[ mprank ].min_level.min(), data[ mprank ].max_level.max()+1 ) + fields = [f'by_level.{l:02}.{suffix}' for l in levels] + + x = numpy.arange( levels.start, levels.stop ) + y = data[ mprank ][ fields ].to_numpy()[0] + + ax.plot( x, y, ls='None', markersize=5, marker='o' ) + + ax.set_xlabel( xlabel, fontsize=10 ) + ax.set_ylabel( ylabel, fontsize=10 ) + + +def plot_single_quantity( data, suffix, ax, xlabel, ylabel, keys=None ): + """ + Plot data level by level + """ + + # select specific domaine if not None + if keys == None: + keys = list(data.keys()) + + rmin = int( min(keys) ) + rmax = int( max(keys) ) + + x = numpy.zeros( len(keys) ) + y = numpy.zeros( x.shape ) + + for id in range(0, len(keys)): + mprank = int( keys[ id ] ) + x[ id ] = mprank + y[ id ] = data[ str(mprank) ][suffix].values[0] + print("\t>[plot] Process # {}, data single: '{}'".format( mprank, data[ str(mprank)][suffix].values )) + + ax.plot( x, y, ls='None', markersize=5, marker='o' ) + + ax.set_xlabel( xlabel, fontsize=10 ) + ax.set_ylabel( ylabel, fontsize=10 ) + +# arguments parser +CLI = argparse.ArgumentParser() +CLI.add_argument("--files", nargs="*", type=str, default=[""], help="stats file") +CLI.add_argument("--jtag", type=str, default="stats", help="json tag") + +# parse the command line +args = CLI.parse_args() + +min_level = 99 +max_level = 0 +process_stats = {} +for fin in args.files: + + print("\t> Parsing file: '{}'".format( fin )) + fdata = pd.read_json( fin ) + fdata = pd.json_normalize( fdata[ args.jtag ] ) + + match = re.search(r'process_(\d+)', fin) + if match: + rank = match.group(1) + else: + rank = 0 + + # compute min/max level overall datasets + min_level = min( min_level, fdata.min_level.min() ) + max_level = max( max_level, fdata.max_level.max() ) + + assert rank not in process_stats.keys(), "statistics for process {} exists".format(rank) + process_stats[ rank ] = fdata + + +nrow, ncol = 3, 2 +fig = plt.figure(figsize=(5*ncol, 5*nrow)) + +# plot("cells", "Number of cells per level", "Level", "number of cells", ) + +ax = fig.add_subplot(nrow, ncol, 1) +plot_by_level( process_stats, "cells", ax, xlabel="level", ylabel="Number of cells" ) +ax.set_xlim( min_level-1, max_level+1 ) +ax.set_title( "Number of cells", fontweight="bold", fontsize=10 ) +x = numpy.arange(min_level, max_level+1) +ax.set_xticks( x, labels=x ) + +ax = fig.add_subplot(nrow, ncol, 2) +plot_by_level( process_stats, "axis-0.number of intervals", ax, xlabel="level", ylabel="Number of intervals" ) +ax.set_xlim( min_level-1, max_level+1 ) +ax.set_title( "Number of intervals", fontweight="bold", fontsize=10 ) +x = numpy.arange(min_level, max_level+1) +ax.set_xticks( x, labels=x ) + +ax = fig.add_subplot(nrow, ncol, 3) +plot_single_quantity( process_stats, "n_neighbours", ax, xlabel="MPI rank", ylabel="Number of neighbours" ) +ax.set_title( "Number of geometrical neighbours", fontweight="bold", fontsize=10 ) +# x = numpy.arange(min_level, max_level+1) +# ax.set_xticks( x, labels=x ) + +ax = fig.add_subplot(nrow, ncol, 4) +plot_mma( process_stats, "axis-0.cells per interval", ax, xlabel="level", ylabel="Number of cells" ) +ax.set_xlim( min_level-1, max_level+1 ) +ax.set_title( "Number of cells per interval x-axis", fontweight="bold", fontsize=10 ) +x = numpy.arange(min_level, max_level+1) +ax.set_xticks( x, labels=x ) + + +plt.savefig( f"{args.jtag}.png", dpi=300 ) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6a4a5dd10..6de6d9666 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -28,6 +28,9 @@ set(SAMURAI_TESTS test_portion.cpp test_scaling.cpp test_utils.cpp + test_sfc.cpp + test_mrmesh.cpp + test_loadbalancing.cpp ) if(rapidcheck_FOUND) diff --git a/tests/main.cpp b/tests/main.cpp index a3344fda4..9767eb035 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -1,13 +1,15 @@ -#ifdef SAMURAI_WITH_MPI -#include -#endif #include +#include int main(int argc, char* argv[]) { -#ifdef SAMURAI_WITH_MPI - boost::mpi::environment env(argc, argv); -#endif + samurai::initialize(); + ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + + int ret = RUN_ALL_TESTS(); + + samurai::finalize(); + + return ret; } diff --git a/tests/test_adapt.cpp b/tests/test_adapt.cpp index 5ba4637e1..c56e1df2a 100644 --- a/tests/test_adapt.cpp +++ b/tests/test_adapt.cpp @@ -27,7 +27,7 @@ namespace samurai TYPED_TEST(adapt_test, mutliple_fields) { - ::samurai::initialize(); + // ::samurai::initialize(); static constexpr std::size_t dim = TypeParam::value; using config = MRConfig; @@ -38,6 +38,7 @@ namespace samurai auto adapt = make_MRAdapt(u_1, u_2, u_3); adapt(1e-4, 2); - ::samurai::finalize(); + + // ::samurai::finalize(); } } diff --git a/tests/test_cell.cpp b/tests/test_cell.cpp index 9893d47dd..49a00ef9c 100644 --- a/tests/test_cell.cpp +++ b/tests/test_cell.cpp @@ -50,4 +50,41 @@ namespace samurai xt::xarray expected{.5, .5}; EXPECT_EQ(c.corner(), expected); } -} + + /** + * Test of samurai::cell_length function (cell.hpp) + */ + TEST(cell, cell_length){ + EXPECT_DOUBLE_EQ(samurai::cell_length(1), 0.5); + EXPECT_DOUBLE_EQ(samurai::cell_length(3), 0.125); + EXPECT_DOUBLE_EQ(samurai::cell_length(20), 9.5367431640625e-07 ); + } + + /** + * Test of cell::face_center function (cell.hpp) + */ + TEST(cell, face_center){ + auto indices = xt::xtensor_fixed>({1, 1}); + samurai::Cell<2, Interval> c { 1, indices, 0 }; + + { + auto dir_x_p = xt::xtensor_fixed>({1, 0}); + auto fxp = c.face_center( dir_x_p ); + EXPECT_DOUBLE_EQ( fxp(0), 1. ); + } + + { + auto dir_x_m = xt::xtensor_fixed>({-1, 0}); + auto fxp = c.face_center( dir_x_m ); + EXPECT_DOUBLE_EQ( fxp(0), 0.5 ); + } + + { + auto dir_y_p = xt::xtensor_fixed>({0, 1}); + auto fxp = c.face_center( dir_y_p ); + EXPECT_DOUBLE_EQ( fxp(1), 1. ); + } + + } + +} \ No newline at end of file diff --git a/tests/test_loadbalancing.cpp b/tests/test_loadbalancing.cpp new file mode 100644 index 000000000..bec339199 --- /dev/null +++ b/tests/test_loadbalancing.cpp @@ -0,0 +1,149 @@ +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef SAMURAI_WITH_MPI +#include +#endif + +#include + +namespace samurai { + +#ifdef SAMURAI_WITH_MPI + /* + * test cmptLoad; + */ + TEST(loadBalance, cmptLoad){ + + constexpr int dim = 2; + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + + samurai::CellList cl; + cl[0][{0}].add_interval({0, 4}); + cl[0][{1}].add_interval({0, 1}); + cl[0][{1}].add_interval({3, 4}); + cl[1][{2}].add_interval({2, 6}); + cl[2][{6}].add_interval({4, 6}); + cl[2][{6}].add_interval({10, 12}); + + Mesh_t mesh( cl, 0, 2 ); + + auto ncells = cmptLoad( mesh ); + auto ninter = cmptLoad( mesh ); + + ASSERT_EQ( ncells, 14 ); + ASSERT_EQ( ninter, 6 ); + + } + + /** + * Be aware that the cmptInterface require 2:1 balance for this test + */ + TEST(loadBalance, cmptInterface){ + constexpr int dim = 2; + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + + samurai::CellList cl_a, cl_b; + + { + cl_a[0][{0}].add_interval({0, 4}); // level 0 + + cl_a[1][{2}].add_interval({0, 2}); // level 1 + cl_a[1][{3}].add_interval({0, 2}); // level 1 + cl_a[1][{2}].add_interval({6, 8}); // level 1 + cl_a[1][{3}].add_interval({6, 8}); // level 1 + + cl_a[1][{2}].add_interval({2, 6}); // level 1 + cl_a[2][{6}].add_interval({4, 6}); // level 2 + cl_a[2][{6}].add_interval({10, 12}); // level 2 + } + + { + // cl_b[0][{2}].add_interval({0, 4}); // level 0 + cl_b[0][{3}].add_interval({0, 4}); // level 0 + + cl_b[1][{4}].add_interval({0, 8}); // level 1 + cl_b[1][{5}].add_interval({0, 8}); // level 1 + + cl_b[1][{3}].add_interval({3, 5}); // level 1 + cl_b[2][{7}].add_interval({4, 6}); // level 2 + cl_b[2][{7}].add_interval({10, 12}); // level 2 + } + + Mesh_t mesh_a ( cl_a, 0, 2 ), mesh_b (cl_b, 0, 2 ); + + auto interface = cmptInterface( mesh_a, mesh_b ); + + ASSERT_EQ( interface.nb_cells(), 10 ); + + } + + TEST(loadBalance, cellExists){ + constexpr int dim = 2; + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + + samurai::CellList cl_a, cl_b; + + { + cl_a[0][{0}].add_interval({0, 4}); // level 0 + + cl_a[1][{2}].add_interval({0, 2}); // level 1 + cl_a[1][{3}].add_interval({0, 2}); // level 1 + cl_a[1][{2}].add_interval({6, 8}); // level 1 + cl_a[1][{3}].add_interval({6, 8}); // level 1 + + cl_a[1][{2}].add_interval({2, 6}); // level 1 + cl_a[2][{6}].add_interval({4, 6}); // level 2 + cl_a[2][{6}].add_interval({10, 12}); // level 2 + } + + Mesh_t mesh ( cl_a, 0, 2 ); + + { + xt::xtensor_fixed> ij = {{3, 2}}; + ASSERT_TRUE( samurai::cellExists( mesh, Mesh_t::mesh_id_t::cells, static_cast(1), ij ) ); + } + + { + xt::xtensor_fixed> ij = {{0, 1}}; + ASSERT_FALSE( samurai::cellExists( mesh, Mesh_t::mesh_id_t::cells, static_cast(0), ij ) ); + } + + { + xt::xtensor_fixed> ij = {{0, 2}}; + ASSERT_TRUE( samurai::cellExists( mesh, Mesh_t::mesh_id_t::reference, static_cast(0), ij ) ); + } + + auto coords = make_field("coordinates", mesh); + auto level_field = make_field("level", mesh); + for_each_cell(mesh[Mesh_t::mesh_id_t::reference], [&](auto& cell) { + if constexpr ( dim == 1 ) + { + coords[cell] = cell.indices[0]; + } + else + { + coords[cell] = cell.indices; + } + level_field[cell] = cell.level; + }); + + samurai::save( "./", "test-lb-cellExists", {true, true}, mesh, coords, level_field ); + + } + +#endif +} diff --git a/tests/test_mrmesh.cpp b/tests/test_mrmesh.cpp new file mode 100644 index 000000000..62f0f427f --- /dev/null +++ b/tests/test_mrmesh.cpp @@ -0,0 +1,245 @@ +#include + +#include +#include +#include +#include + +#include + +namespace samurai +{ + + template + inline samurai::CellList getInitState() + { + assert(dim == 2); + + samurai::CellList cl; + cl[0][{0}].add_interval({0, 4}); + cl[0][{1}].add_interval({0, 1}); + cl[0][{1}].add_interval({3, 4}); + cl[0][{2}].add_interval({0, 1}); + cl[0][{2}].add_interval({3, 4}); + cl[0][{3}].add_interval({0, 3}); + + cl[1][{2}].add_interval({2, 6}); + cl[1][{3}].add_interval({2, 6}); + cl[1][{4}].add_interval({2, 4}); + cl[1][{4}].add_interval({5, 6}); + cl[1][{5}].add_interval({2, 6}); + cl[1][{6}].add_interval({6, 8}); + cl[1][{7}].add_interval({6, 7}); + + cl[2][{8}].add_interval({8, 10}); + cl[2][{9}].add_interval({8, 10}); + cl[2][{14}].add_interval({14, 16}); + cl[2][{15}].add_interval({14, 16}); + + return cl; + } + + /* + * test MR Mesh; + */ + TEST(mrmesh, test_nbcells2D) + { + constexpr int dim = 2; + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + + Mesh_t mesh(getInitState(), 0, 2); + + samurai::save("./", "test_mrmesh_cells_and_ghosts", mesh); + + ASSERT_EQ(mesh.min_level(), 0); + ASSERT_EQ(mesh.max_level(), 2); + + /** + * Need fix, this does not work. + */ + // std::vector nCellPerLevel_withGhost = {36, 48, 32}; // including ghost + // for (size_t ilvl = mesh.min_level(); ilvl <= mesh.max_level(); ++ilvl) + // { + // std::cerr << "cells_and_ghosts [" << ilvl << "]: " << mesh.nb_cells(ilvl, samurai::MRMeshId::cells_and_ghosts) << std::endl; + // // ASSERT_EQ(mesh.nb_cells(ilvl), nCellPerLevel_withGhost[ilvl]); + // } + + std::vector nCellPerLevel_proj = {5, 2, 0}; // proj cells + for (size_t ilvl = mesh.min_level(); ilvl <= mesh.max_level(); ++ilvl) + { + // std::cerr << "proj_cells [" << ilvl << "]: " << mesh.nb_cells(ilvl, samurai::MRMeshId::proj_cells) << std::endl; + ASSERT_EQ(mesh.nb_cells(ilvl, samurai::MRMeshId::proj_cells), nCellPerLevel_proj[ilvl]); + } + + std::vector nCellPerLevel_leaves = {11, 18, 8}; // not including ghost + for (size_t ilvl = mesh.min_level(); ilvl <= mesh.max_level(); ++ilvl) + { + ASSERT_EQ(mesh.nb_cells(ilvl, samurai::MRMeshId::cells), nCellPerLevel_leaves[ilvl]); + } + } + + TEST(mrmesh, test_exist2D) + { + constexpr int dim = 2; + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + using Interval_t = typename Mesh_t::interval_t; + + Mesh_t mesh(getInitState(), 0, 2); + + Interval_t i{0, 3, 0}; + + // const auto val = mesh.exists(samurai::MRMeshId::cells, 1, i); + } + + TEST(mrmesh, test_merge) + { + constexpr int dim = 2; + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + using CellArray_t = Mesh_t::ca_type; + + samurai::CellList cl; + cl[0][{0}].add_interval({0, 4}); + cl[0][{1}].add_interval({0, 1}); + cl[0][{1}].add_interval({3, 4}); + cl[1][{2}].add_interval({2, 6}); + + Mesh_t mesh(cl, 0, 2); + ASSERT_EQ(mesh.nb_cells(Mesh_t::mesh_id_t::cells), 10); + + samurai::CellList to_add_cl; + to_add_cl[1][{3}].add_interval({2, 6}); + + CellArray_t to_add_ca = {to_add_cl, false}; + + mesh.merge(to_add_ca); + + // technically not sufficient to be sure the merge was done properly + ASSERT_EQ(mesh.nb_cells(Mesh_t::mesh_id_t::cells), 14); + } + + TEST(mrmesh, test_remove) + { + constexpr int dim = 2; + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + using CellArray_t = Mesh_t::ca_type; + + samurai::CellList cl; + cl[0][{0}].add_interval({0, 4}); + cl[0][{1}].add_interval({0, 1}); + cl[0][{1}].add_interval({3, 4}); + cl[1][{2}].add_interval({2, 6}); + cl[1][{3}].add_interval({2, 6}); + + Mesh_t mesh(cl, 0, 2); + ASSERT_EQ(mesh.nb_cells(Mesh_t::mesh_id_t::cells), 14); + + samurai::CellList to_rm_cl; + to_rm_cl[0][{1}].add_interval({3, 4}); + to_rm_cl[1][{3}].add_interval({2, 6}); + + CellArray_t to_rm_ca = {to_rm_cl, false}; + + mesh.remove(to_rm_ca); + + // technically not sufficient to be sure the remove was done properly + ASSERT_EQ(mesh.nb_cells(Mesh_t::mesh_id_t::cells), 9); + } + + TEST(mrmesh, test_construct_subdomain) + { + constexpr int dim = 2; + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + + samurai::CellList cl; + cl[0][{0}].add_interval({0, 4}); + cl[0][{1}].add_interval({0, 1}); + cl[0][{1}].add_interval({3, 4}); + cl[0][{2}].add_interval({0, 1}); + cl[0][{2}].add_interval({3, 4}); + + cl[1][{2}].add_interval({2, 6}); + cl[1][{3}].add_interval({2, 6}); + cl[1][{4}].add_interval({2, 4}); + cl[1][{4}].add_interval({5, 6}); + cl[1][{5}].add_interval({2, 6}); + cl[1][{6}].add_interval({6, 8}); + cl[1][{7}].add_interval({6, 7}); + + Mesh_t mesh( cl, 0, 1); + + bool intersect_found = false; + const auto & subdom = mesh.subdomain(); + for(std::size_t level=mesh.min_level(); level 0) { intersect_found = true; } + } + + ASSERT_FALSE( intersect_found ); + + } + + TEST(mrmesh, test_construct_union) + { + constexpr int dim = 2; + + using Config = samurai::MRConfig; + using Mesh_t = samurai::MRMesh; + using cl_t = Mesh_t::cl_type; + using ca_t = Mesh_t::ca_type; + + Mesh_t mesh( getInitState(), 0, 2); + + auto un = mesh.get_union(); + + ASSERT_EQ( un[ 0 ].nb_cells(), 5); + { + cl_t cl; + cl[0][{1}].add_interval({1,3}); + cl[0][{2}].add_interval({1,3}); + cl[0][{3}].add_interval({3,4}); + ca_t ca = { cl, false }; + auto tmp_ = samurai::difference( ca[0], un[0] ); + + size_t ni = 0; + tmp_( [&]([[maybe_unused]]const auto & interval, [[maybe_unused]]const auto & index){ + ni +=1; + }); + ASSERT_EQ( ni, 0); + } + + ASSERT_EQ( un[ 1 ].nb_cells(), 2); + { + cl_t cl; + cl[1][{4}].add_interval({4,5}); + cl[1][{7}].add_interval({7,8}); + ca_t ca = { cl, false }; + auto tmp_ = samurai::difference( ca[1], un[1] ); + + size_t ni = 0; + tmp_( [&]([[maybe_unused]]const auto & interval, [[maybe_unused]]const auto & index){ + ni +=1; + }); + ASSERT_EQ( ni, 0); + } + + ASSERT_EQ( un[ 2 ].nb_cells(), 0); + + + } + +} \ No newline at end of file diff --git a/tests/test_sfc.cpp b/tests/test_sfc.cpp new file mode 100644 index 000000000..e72ce4bb4 --- /dev/null +++ b/tests/test_sfc.cpp @@ -0,0 +1,190 @@ +#include +#include +#include + +#include +#include + +#include + +namespace samurai +{ + using Coord_2D_t = xt::xtensor_fixed>; + using Coord_3D_t = xt::xtensor_fixed>; + + /* + * test computation of morton indexes 2D + */ + TEST(sfc, morton2D_getKey) + { + constexpr int dim = 2; + + SFC morton; + + Coord_2D_t ij = {0, 0}; + auto key1 = morton.getKey(ij); + ASSERT_EQ(key1, 0); + + ij = {2, 1}; + auto key2 = morton.getKey(ij); + ASSERT_EQ(key2, 6); + + ij = {5, 6}; + auto key3 = morton.getKey(ij); + ASSERT_EQ(key3, 57); + + ij = {0, 4}; + auto key4 = morton.getKey(ij); + ASSERT_EQ(key4, 32); + + ij = {4, 0}; + auto key5 = morton.getKey(ij); + ASSERT_EQ(key5, 16); + } + + /* + * test computation of (i,j) logical coordinate from morton indexes 2D + */ + TEST(sfc, morton2D_getCoordinates) + { + constexpr int dim = 2; + + SFC morton; + + auto ij = morton.getCoordinates(static_cast(0)); + ASSERT_EQ(ij(0), 0); + ASSERT_EQ(ij(1), 0); + + auto ij2 = morton.getCoordinates(static_cast(31)); + ASSERT_EQ(ij2(0), 7); + ASSERT_EQ(ij2(1), 3); + + auto ij3 = morton.getCoordinates(static_cast(51)); + ASSERT_EQ(ij3(0), 5); + ASSERT_EQ(ij3(1), 5); + + auto ij4 = morton.getCoordinates(static_cast(39)); + ASSERT_EQ(ij4(0), 3); + ASSERT_EQ(ij4(1), 5); + } + + /* + * test computation of morton indexes 3D + */ + TEST(sfc, morton3D_getKey) + { + constexpr int dim = 3; + + SFC morton; + + Coord_3D_t ijk = {0, 0, 0}; + auto key1 = morton.getKey(ijk); + ASSERT_EQ(key1, 0); + + ijk = {1, 1, 0}; + auto key2 = morton.getKey(ijk); + ASSERT_EQ(key2, 3); + + ijk = {0, 0, 1}; + auto key3 = morton.getKey(ijk); + ASSERT_EQ(key3, 4); + + ijk = {5, 9, 1}; + auto key4 = morton.getKey(ijk); + ASSERT_EQ(key4, 1095); + } + + /* + * test computation of (i,j,k) logical coordinates from morton indexes 3D + */ + TEST(sfc, morton3D_getCoordinates) + { + constexpr int dim = 3; + + SFC morton; + + auto ijk = morton.getCoordinates(static_cast(1095)); + ASSERT_EQ(ijk(0), 5); + ASSERT_EQ(ijk(1), 9); + ASSERT_EQ(ijk(2), 1); + } + + /* + * test computation of hilbert key for 2D + */ + TEST(sfc, hilbert2D_getKey) + { + constexpr int dim = 2; + + SFC hilbert; + + Coord_2D_t ij = {0, 0}; + ASSERT_EQ(hilbert.getKey(ij), static_cast(0)); + + ij = {1, 1}; + ASSERT_EQ(hilbert.getKey(ij), static_cast(2)); + + ij = {1, 2}; + ASSERT_EQ(hilbert.getKey(ij), static_cast(7)); + + ij = {3, 1}; + ASSERT_EQ(hilbert.getKey(ij), static_cast(12)); + + ij = {3, 4}; + ASSERT_EQ(hilbert.getKey(ij), static_cast(53)); + + ij = {3, 5}; + ASSERT_EQ(hilbert.getKey(ij), static_cast(52)); + + ij = {3, 6}; + ASSERT_EQ(hilbert.getKey(ij), static_cast(51)); + } + + /* + * test computation of hilbert key & back to {i,j,k} + */ + // TEST(tests_hilbert, test_consistency_hilbert3D){ + + // { + // Logical_Pos_t pt1 {0, 0 ,0}; + // auto hk1 = getHilbertKey3D( pt1, 0 ); + // auto pt1_r = getLogicalFromHilberKey3D( hk1, 0 ); + // ASSERT_EQ( pt1.i, pt1_r.i ); + // ASSERT_EQ( pt1.j, pt1_r.j ); + // ASSERT_EQ( pt1.k, pt1_r.k ); + // } + + // { + // Logical_Pos_t pt1 {2, 3, 1}; + // auto hk1 = getHilbertKey3D( pt1, 2 ); + // auto pt1_r = getLogicalFromHilberKey3D( hk1, 2 ); + // ASSERT_EQ( pt1.i, pt1_r.i ); + // ASSERT_EQ( pt1.j, pt1_r.j ); + // ASSERT_EQ( pt1.k, pt1_r.k ); + // } + + // { + // Logical_Pos_t pt1 {8, 14, 28}; + // auto hk1 = getHilbertKey3D( pt1, 6 ); + // auto pt1_r = getLogicalFromHilberKey3D( hk1, 6 ); + // ASSERT_EQ( pt1.i, pt1_r.i ); + // ASSERT_EQ( pt1.j, pt1_r.j ); + // ASSERT_EQ( pt1.k, pt1_r.k ); + // } + + // } + + /* + * test computation of morton indexes 3D + */ + // TEST(tests_hilbert, test_consistency_hilbert3D_old){ + + // { + // auto hk1 = get_hilbert_key_3D( {0, 0 ,0}, 0 ); + // auto pt1_r = get_logical_from_hilbert_3D( hk1, 0, 3); + // ASSERT_EQ( 0, pt1_r[0] ); + // ASSERT_EQ( 0, pt1_r[1] ); + // ASSERT_EQ( 0, pt1_r[2] ); + // } + +} \ No newline at end of file