diff --git a/demos/CMakeLists.txt b/demos/CMakeLists.txt
index 886c62108..c4dc0d812 100644
--- a/demos/CMakeLists.txt
+++ b/demos/CMakeLists.txt
@@ -3,8 +3,9 @@ find_package(CLI11)
 
 add_subdirectory(from_obj)
 add_subdirectory(FiniteVolume)
+add_subdirectory(loadbalancing)
 # add_subdirectory(MPI)
-# add_subdirectory(LBM)
+add_subdirectory(LBM)
 add_subdirectory(p4est)
 add_subdirectory(pablo)
 add_subdirectory(tutorial)
diff --git a/demos/FiniteVolume/CMakeLists.txt b/demos/FiniteVolume/CMakeLists.txt
index e1d8ba6c0..a5f41a5b8 100644
--- a/demos/FiniteVolume/CMakeLists.txt
+++ b/demos/FiniteVolume/CMakeLists.txt
@@ -27,6 +27,10 @@ if(PETSC_FOUND)
     target_link_libraries(manual_block_matrix_assembly samurai CLI11::CLI11 ${PETSC_LINK_LIBRARIES} ${MPI_LIBRARIES})
 endif()
 
+add_executable(finite-volume-linear-convection linear_convection.cpp)
+target_link_libraries(finite-volume-linear-convection samurai CLI11::CLI11 ${MPI_LIBRARIES})
+
+
 add_executable(finite-volume-amr-burgers-hat AMR_Burgers_Hat.cpp)
 target_link_libraries(finite-volume-amr-burgers-hat samurai CLI11::CLI11)
 
@@ -53,8 +57,6 @@ target_link_libraries(finite-volume-advection-2d-user-bc samurai CLI11::CLI11)
 add_executable(finite-volume-scalar-burgers-2d scalar_burgers_2d.cpp)
 target_link_libraries(finite-volume-scalar-burgers-2d samurai CLI11::CLI11)
 
-add_executable(finite-volume-linear-convection linear_convection.cpp)
-target_link_libraries(finite-volume-linear-convection samurai CLI11::CLI11)
 
 add_executable(finite-volume-burgers burgers.cpp)
 target_link_libraries(finite-volume-burgers samurai CLI11::CLI11)
diff --git a/demos/FiniteVolume/advection_2d.cpp b/demos/FiniteVolume/advection_2d.cpp
index 4b6c85d0f..76fa6a5f3 100644
--- a/demos/FiniteVolume/advection_2d.cpp
+++ b/demos/FiniteVolume/advection_2d.cpp
@@ -15,6 +15,16 @@
 #include <samurai/stencil_field.hpp>
 #include <samurai/subset/subset_op.hpp>
 
+#include <samurai/load_balancing.hpp>
+#include <samurai/load_balancing_sfc.hpp>
+#include <samurai/load_balancing_sfc_w.hpp>
+#include <samurai/load_balancing_diffusion.hpp>
+#include <samurai/load_balancing_force.hpp>
+#include <samurai/load_balancing_diffusion_interval.hpp>
+#include <samurai/load_balancing_void.hpp>
+
+#include <samurai/timers.hpp>
+
 #include <filesystem>
 namespace fs = std::filesystem;
 
@@ -23,11 +33,13 @@ auto init(Mesh& mesh)
 {
     auto u = samurai::make_field<double, 1>("u", mesh);
 
+    u.fill( 0. );
+
     samurai::for_each_cell(
         mesh,
         [&](auto& cell)
         {
-            auto center           = cell.center();
+            auto center = cell.center();
             const double radius   = .2;
             const double x_center = 0.3;
             const double y_center = 0.3;
@@ -38,7 +50,7 @@ auto init(Mesh& mesh)
             else
             {
                 u[cell] = 0;
-            }
+	    }
         });
 
     return u;
@@ -146,21 +158,26 @@ void flux_correction(double dt, const std::array<double, 2>& a, const Field& u,
 template <class Field>
 void save(const fs::path& path, const std::string& filename, const Field& u, const std::string& suffix = "")
 {
-    auto mesh   = u.mesh();
-    auto level_ = samurai::make_field<std::size_t, 1>("level", mesh);
+    auto mesh    = u.mesh();
+    auto level_  = samurai::make_field<std::size_t, 1>("level", mesh);
+    auto domain_ = samurai::make_field<int, 1>("domain", mesh);
 
     if (!fs::exists(path))
     {
         fs::create_directory(path);
     }
 
+    int mrank = 0;
+
     samurai::for_each_cell(mesh,
                            [&](const auto& cell)
                            {
-                               level_[cell] = cell.level;
+                               level_[cell]  = cell.level;
+                               domain_[cell] = mrank;
                            });
 #ifdef SAMURAI_WITH_MPI
     mpi::communicator world;
+    mrank = world.rank();
     samurai::save(path, fmt::format("{}_size_{}{}", filename, world.size(), suffix), mesh, u, level_);
 #else
     samurai::save(path, fmt::format("{}{}", filename, suffix), mesh, u, level_);
@@ -175,6 +192,7 @@ int main(int argc, char* argv[])
     using Config              = samurai::MRConfig<dim>;
 
     // Simulation parameters
+    double radius = 0.2, x_center = 0.3, y_center = 0.3;
     xt::xtensor_fixed<double, xt::xshape<dim>> min_corner = {0., 0.};
     xt::xtensor_fixed<double, xt::xshape<dim>> max_corner = {1., 1.};
     std::array<double, dim> a{
@@ -194,6 +212,7 @@ int main(int argc, char* argv[])
     fs::path path        = fs::current_path();
     std::string filename = "FV_advection_2d";
     std::size_t nfiles   = 1;
+    std::size_t nt_loadbalance = 1; // nombre d'iteration entre les equilibrages
 
     app.add_option("--min-corner", min_corner, "The min corner of the box")->capture_default_str()->group("Simulation parameters");
     app.add_option("--max-corner", max_corner, "The max corner of the box")->capture_default_str()->group("Simulation parameters");
@@ -202,6 +221,7 @@ int main(int argc, char* argv[])
     app.add_option("--Tf", Tf, "Final time")->capture_default_str()->group("Simulation parameters");
     app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution");
     app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution");
     app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh")
         ->capture_default_str()
         ->group("Multiresolution");
@@ -227,19 +247,57 @@ int main(int argc, char* argv[])
     double t             = 0.;
 
     auto u = init(mesh);
+
+
     samurai::make_bc<samurai::Dirichlet<1>>(u, 0.);
     auto unp1 = samurai::make_field<double, 1>("unp1", mesh);
 
     auto MRadaptation = samurai::make_MRAdapt(u);
+
+    samurai::times::timers.start("MRadaptation");
     MRadaptation(mr_epsilon, mr_regularity);
+    samurai::times::timers.stop("MRadaptation");
+
     save(path, filename, u, "_init");
 
     std::size_t nsave = 1;
     std::size_t nt    = 0;
 
+
+// For now, void_balancer is verified and works properly
+// Diffusion_LoadBalancer_cell not exist ???
+// Load_balancing::Diffusion donne de très mauvais resultats, peut-etre des parametres internes ? 
+
+    SFC_LoadBalancer_interval<dim, Morton> balancer;
+//    Void_LoadBalancer<dim> balancer;
+//    Diffusion_LoadBalancer_cell<dim> balancer;
+//     Diffusion_LoadBalancer_interval<dim> balancer;
+//     Load_balancing::Diffusion balancer;
+    // Load_balancing::SFCw<dim, Morton> balancer;
+
+    std::ofstream logs;
+#ifdef SAMURAI_WITH_MPI    
+    boost::mpi::communicator world;
+    logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app );
+#endif
     while (t != Tf)
     {
+        bool reqBalance = balancer.require_balance( mesh );
+
+        if( reqBalance ) std::cerr << "\t> Load Balancing required !!! " << std::endl;
+
+        // if ( ( nt % nt_loadbalance == 0 || reqBalance ) && nt > 1 )
+        if ( ( nt % nt_loadbalance == 0 ) && nt > 1 )
+        // if ( reqBalance && nt > 1 )
+        {
+            samurai::times::timers.start("load-balancing");
+            balancer.load_balance(mesh, u);
+            samurai::times::timers.stop("load-balancing");
+        }
+
+        samurai::times::timers.start("MRadaptation");
         MRadaptation(mr_epsilon, mr_regularity);
+        samurai::times::timers.stop("MRadaptation");
 
         t += dt;
         if (t > Tf)
@@ -251,8 +309,13 @@ int main(int argc, char* argv[])
         std::cout << fmt::format("iteration {}: t = {}, dt = {}", nt++, t, dt) << std::endl;
 
         samurai::update_ghost_mr(u);
+
         unp1.resize();
+
+        samurai::times::timers.start("upwind");
         unp1 = u - dt * samurai::upwind(a, u);
+        samurai::times::timers.stop("upwind");
+
         if (correction)
         {
             flux_correction(dt, a, u, unp1);
@@ -265,7 +328,9 @@ int main(int argc, char* argv[])
             const std::string suffix = (nfiles != 1) ? fmt::format("_ite_{}", nsave++) : "";
             save(path, filename, u, suffix);
         }
+
     }
+
     samurai::finalize();
     return 0;
 }
diff --git a/demos/FiniteVolume/linear_convection.cpp b/demos/FiniteVolume/linear_convection.cpp
index 910739f03..1c3b4e380 100644
--- a/demos/FiniteVolume/linear_convection.cpp
+++ b/demos/FiniteVolume/linear_convection.cpp
@@ -7,6 +7,20 @@
 #include <samurai/samurai.hpp>
 #include <samurai/schemes/fv.hpp>
 
+#include <samurai/load_balancing.hpp>
+#include <samurai/load_balancing_sfc.hpp>
+#include <samurai/load_balancing_diffusion.hpp>
+#include <samurai/load_balancing_force.hpp>
+#include <samurai/load_balancing_diffusion_interval.hpp>
+#include <samurai/load_balancing_void.hpp>
+#include <samurai/load_balancing_life.hpp>
+
+#include <samurai/timers.hpp>
+
+#ifdef WITH_STATS
+#include "samurai/statistics.hpp"
+#endif
+
 #include <filesystem>
 namespace fs = std::filesystem;
 
@@ -44,7 +58,9 @@ int main(int argc, char* argv[])
     //--------------------//
     // Program parameters //
     //--------------------//
-
+#ifdef SAMURAI_WITH_MPI    
+    boost::mpi::communicator world;
+#endif
     // Simulation parameters
     double left_box  = -1;
     double right_box = 1;
@@ -64,6 +80,7 @@ int main(int argc, char* argv[])
     fs::path path        = fs::current_path();
     std::string filename = "linear_convection_" + std::to_string(dim) + "D";
     std::size_t nfiles   = 0;
+    std::size_t nt_loadbalance = 10;
 
     app.add_option("--left", left_box, "The left border of the box")->capture_default_str()->group("Simulation parameters");
     app.add_option("--right", right_box, "The right border of the box")->capture_default_str()->group("Simulation parameters");
@@ -72,6 +89,7 @@ int main(int argc, char* argv[])
     app.add_option("--cfl", cfl, "The CFL")->capture_default_str()->group("Simulation parameters");
     app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution");
     app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution");
     app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh")
         ->capture_default_str()
         ->group("Multiresolution");
@@ -96,31 +114,40 @@ int main(int argc, char* argv[])
     box_corner2.fill(right_box);
     Box box(box_corner1, box_corner2);
     std::array<bool, dim> periodic;
-    periodic.fill(true);
+    periodic.fill(false);
+    
+    samurai::times::timers.start("init");
     samurai::MRMesh<Config> mesh{box, min_level, max_level, periodic};
 
     // Initial solution
-    auto u = samurai::make_field<1>("u",
+    auto u = samurai::make_field<double, 1>("u",
                                     mesh,
-                                    [](const auto& coords)
+                                    [&](const auto& coords)
                                     {
                                         if constexpr (dim == 1)
                                         {
                                             const auto& x = coords(0);
                                             return (x >= -0.8 && x <= -0.3) ? 1. : 0.;
                                         }
-                                        else
+                                        else if ( dim == 2 )
                                         {
                                             const auto& x = coords(0);
                                             const auto& y = coords(1);
                                             return (x >= -0.8 && x <= -0.3 && y >= 0.3 && y <= 0.8) ? 1. : 0.;
                                         }
+
                                     });
+    samurai::times::timers.stop("init");
 
-    auto unp1 = samurai::make_field<1>("unp1", mesh);
+    auto unp1 = samurai::make_field<double, 1>("unp1", mesh);
     // Intermediary fields for the RK3 scheme
-    auto u1 = samurai::make_field<1>("u1", mesh);
-    auto u2 = samurai::make_field<1>("u2", mesh);
+    auto u1 = samurai::make_field<double, 1>("u1", mesh);
+    auto u2 = samurai::make_field<double, 1>("u2", mesh);
+
+    samurai::make_bc<samurai::Dirichlet<1>>(u, 0.);
+    samurai::make_bc<samurai::Dirichlet<1>>(unp1, 0.);
+    samurai::make_bc<samurai::Dirichlet<1>>(u1, 0.);
+    samurai::make_bc<samurai::Dirichlet<1>>(u2, 0.); 
 
     unp1.fill(0);
     u1.fill(0);
@@ -129,12 +156,19 @@ int main(int argc, char* argv[])
     // Convection operator
     samurai::VelocityVector<dim> velocity;
     velocity.fill(1);
-    if constexpr (dim == 2)
-    {
-        velocity(1) = -1;
-    }
+    velocity(1) = -1 ; 
+
+    // origin weno5
     auto conv = samurai::make_convection_weno5<decltype(u)>(velocity);
 
+    // SFC_LoadBalancer_interval<dim, Morton> balancer;
+    // Load_balancing::Life balancer;
+//    Load_balancing::GlobalCriteria balancer;
+    // Void_LoadBalancer<dim> balancer;
+    // Diffusion_LoadBalancer_cell<dim> balancer;
+    // Diffusion_LoadBalancer_interval<dim> balancer;
+    // Load_balancing::Diffusion balancer;
+
     //--------------------//
     //   Time iteration   //
     //--------------------//
@@ -148,7 +182,10 @@ int main(int argc, char* argv[])
     }
 
     auto MRadaptation = samurai::make_MRAdapt(u);
+    
+    samurai::times::timers.start("MRadaptation");
     MRadaptation(mr_epsilon, mr_regularity);
+    samurai::times::timers.stop("MRadaptation");
 
     double dt_save    = nfiles == 0 ? dt : Tf / static_cast<double>(nfiles);
     std::size_t nsave = 0, nt = 0;
@@ -158,9 +195,20 @@ int main(int argc, char* argv[])
         save(path, filename, u, suffix);
     }
 
+    samurai::times::timers.start("tloop");
     double t = 0;
     while (t != Tf)
     {
+
+/**
+	if (nt % nt_loadbalance == 0 && nt > 1 )
+        {
+            samurai::times::timers.start("tloop.lb:"+balancer.getName());
+            balancer.load_balance(mesh, u);
+            samurai::times::timers.stop("tloop.lb:"+balancer.getName());
+
+        }
+ **/     
         // Move to next timestep
         t += dt;
         if (t > Tf)
@@ -168,30 +216,44 @@ int main(int argc, char* argv[])
             dt += Tf - t;
             t = Tf;
         }
-        std::cout << fmt::format("iteration {}: t = {:.2f}, dt = {}", nt++, t, dt) << std::flush;
+
+        std::cout << fmt::format("iteration {}: t = {:.2f}, dt = {}", nt++, t, dt) << std::flush << std::endl;
 
         // Mesh adaptation
+        samurai::times::timers.start("tloop.MRadaptation");
         MRadaptation(mr_epsilon, mr_regularity);
+        samurai::times::timers.stop("tloop.MRadaptation");
+
+        samurai::times::timers.start("tloop.ugm");
         samurai::update_ghost_mr(u);
+        samurai::times::timers.stop("tloop.ugm");
+
+        samurai::times::timers.start("tloop.resize_fill");
         unp1.resize();
+        unp1.fill(0);
+
         u1.resize();
         u2.resize();
         u1.fill(0);
         u2.fill(0);
+        samurai::times::timers.stop("tloop.resize_fill");
 
         // unp1 = u - dt * conv(u);
 
         // TVD-RK3 (SSPRK3)
+        samurai::times::timers.start("tloop.RK3");
         u1 = u - dt * conv(u);
         samurai::update_ghost_mr(u1);
         u2 = 3. / 4 * u + 1. / 4 * (u1 - dt * conv(u1));
         samurai::update_ghost_mr(u2);
         unp1 = 1. / 3 * u + 2. / 3 * (u2 - dt * conv(u2));
+        samurai::times::timers.stop("tloop.RK3");
 
         // u <-- unp1
         std::swap(u.array(), unp1.array());
 
         // Save the result
+        samurai::times::timers.start("tloop.io");
         if (nfiles == 0 || t >= static_cast<double>(nsave + 1) * dt_save || t == Tf)
         {
             if (nfiles != 1)
@@ -204,9 +266,10 @@ int main(int argc, char* argv[])
                 save(path, filename, u);
             }
         }
+        samurai::times::timers.stop("tloop.io");
 
-        std::cout << std::endl;
     }
+    samurai::times::timers.stop("tloop");
 
     if constexpr (dim == 1)
     {
diff --git a/demos/FiniteVolume/linear_convection_ponio.cpp b/demos/FiniteVolume/linear_convection_ponio.cpp
new file mode 100644
index 000000000..2e4594c27
--- /dev/null
+++ b/demos/FiniteVolume/linear_convection_ponio.cpp
@@ -0,0 +1,278 @@
+// Copyright 2018-2024 the samurai's authors
+// SPDX-License-Identifier:  BSD-3-Clause
+#include <CLI/CLI.hpp>
+
+#include <samurai/hdf5.hpp>
+#include <samurai/mr/adapt.hpp>
+#include <samurai/mr/mesh.hpp>
+#include <samurai/schemes/fv.hpp>
+#include <samurai/samurai.hpp>
+
+#include <samurai/load_balancing.hpp>
+#include <samurai/load_balancing_sfc.hpp>
+#include <samurai/load_balancing_diffusion.hpp>
+#include <samurai/load_balancing_force.hpp>
+#include <samurai/load_balancing_diffusion_interval.hpp>
+#include <samurai/load_balancing_void.hpp>
+#include <samurai/load_balancing_life.hpp>
+
+#include <samurai/timers.hpp>
+
+#include "ponio/runge_kutta.hpp"
+#include "ponio/solver.hpp"
+
+#ifdef WITH_STATS
+#include "samurai/statistics.hpp"
+#endif
+
+#include <filesystem>
+namespace fs = std::filesystem;
+
+template <std::size_t dim>
+double exact_solution(xt::xtensor_fixed<double, xt::xshape<dim>> coords, double t)
+{
+    const double a  = 1;
+    const double b  = 0;
+    const double& x = coords(0);
+    return (a * x + b) / (a * t + 1);
+}
+
+template <class Field>
+void save(const fs::path& path, const std::string& filename, Field& u, const std::string& suffix = "")
+{
+    auto mesh   = u.mesh();
+    u.name()      = "u";
+    auto level_ = samurai::make_field<std::size_t, 1>("level", mesh);
+
+    if (!fs::exists(path))
+    {
+        fs::create_directory(path);
+    }
+
+    samurai::for_each_cell(mesh,
+                           [&](const auto& cell)
+                           {
+                               level_[cell] = cell.level;
+                           });
+
+    samurai::save(path, fmt::format("{}{}", filename, suffix), mesh, u, level_);
+}
+
+int main(int argc, char* argv[])
+{
+    samurai::initialize(argc, argv);
+
+    static constexpr std::size_t dim = 2;
+    using Config                     = samurai::MRConfig<dim, 3>;
+    using Box                        = samurai::Box<double, dim>;
+    using point_t                    = typename Box::point_t;
+
+    std::cout << "------------------------- Linear convection -------------------------" << std::endl;
+
+    //--------------------//
+    // Program parameters //
+    //--------------------//
+    boost::mpi::communicator world;
+
+    // Simulation parameters
+    double left_box      = 0;
+    double right_box     = 4;
+    std::string init_sol = "hat";
+
+    // Time integration
+    double Tf  = 3;
+    double dt  = 0;
+    double cfl = 0.95;
+
+    // Multiresolution parameters
+    std::size_t min_level = 0;
+    std::size_t max_level = dim == 1 ? 5 : 3;
+    double mr_epsilon     = 1e-4; // Threshold used by multiresolution
+    double mr_regularity  = 1.;   // Regularity guess for multiresolution
+
+    // Output parameters
+    fs::path path        = fs::current_path();
+    std::string filename = "linear_convection_" + std::to_string(dim) + "D";
+    std::size_t nfiles   = 0;
+    std::size_t nt_loadbalance = 10;
+
+    CLI::App app{"Finite volume example for the heat equation in 1d"};
+    app.add_option("--left", left_box, "The left border of the box")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--right", right_box, "The right border of the box")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--init-sol", init_sol, "Initial solution: hat/linear/bands")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--Tf", Tf, "Final time")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--dt", dt, "Time step")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--cfl", cfl, "The CFL")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh")
+        ->capture_default_str()
+        ->group("Multiresolution");
+    app.add_option("--mr-reg",
+                   mr_regularity,
+                   "The regularity criteria used by the multiresolution to "
+                   "adapt the mesh")
+        ->capture_default_str()
+        ->group("Multiresolution");
+    app.add_option("--path", path, "Output path")->capture_default_str()->group("Ouput");
+    app.add_option("--filename", filename, "File name prefix")->capture_default_str()->group("Ouput");
+    app.add_option("--nfiles", nfiles, "Number of output files")->capture_default_str()->group("Ouput");
+    app.allow_extras();
+    CLI11_PARSE(app, argc, argv);
+
+    //--------------------//
+    // Problem definition //
+    //--------------------//
+
+    point_t box_corner1, box_corner2;
+    box_corner1.fill(left_box);
+    box_corner2.fill(right_box);
+    Box box(box_corner1, box_corner2);
+    std::array<bool, dim> periodic;
+    periodic.fill(false);
+    
+    samurai::times::timers.start("init");
+    samurai::MRMesh<Config> mesh{box, min_level, max_level, periodic};
+
+    // Initial solution
+    auto u = samurai::make_field<double, 1>("u",
+                                    mesh,
+                                    [&](const auto& coords)
+                                    {
+                                        if constexpr (dim == 1)
+                                        {
+                                            auto& x = coords(0);
+                                            return (x >= -0.8 && x <= -0.3) ? 1. : 0.;
+                                        }
+                                        else if ( dim == 2 )
+                                        {
+                                            auto& x = coords(0);
+                                            auto& y = coords(1);
+                                            return (x <= 0.8 && x >= 0.3 && y >= 0.3 && y <= 0.8) ? 1. : 0.;
+                                        }else {
+                                            auto & x = coords( 0 );
+                                            auto & y = coords( 1 );
+                                            auto & z = coords( 2 );
+                                            return (x <= 0.8 && x >= 0.3 && y >= 0.3 && y <= 0.8 && z >= 0.3 && z <= 0.8 ) ? 1. : 0.;
+                                        }
+
+                                    });
+    samurai::times::timers.stop("init");
+
+    samurai::make_bc<samurai::Dirichlet<1>>(u, 0.);
+
+    // Convection operator
+    samurai::VelocityVector<dim> velocity;
+    velocity.fill(1);
+
+    // origin weno5
+    auto conv = samurai::make_convection_weno5<decltype(u)>(velocity);
+
+    auto ponio_f = [&]([[maybe_unused]]double t, auto && u){
+        samurai::make_bc<samurai::Dirichlet<1>>(u, 0.);
+        samurai::update_ghost_mr( u );
+        return - conv( u );
+    };
+
+    // SFC_LoadBalancer_interval<dim, Morton> balancer;
+    // Load_balancing::Life balancer;
+    // Void_LoadBalancer<dim> balancer;
+    Diffusion_LoadBalancer_cell<dim> balancer;
+    // Diffusion_LoadBalancer_interval<dim> balancer;
+    // Load_balancing::Diffusion balancer;
+
+    //--------------------//
+    //   Time iteration   //
+    //--------------------//
+
+    if (dt == 0)
+    {
+        double dx             = samurai::cell_length(max_level);
+        auto a                = xt::abs(velocity);
+        double sum_velocities = xt::sum(xt::abs(velocity))();
+        dt                    = cfl * dx / sum_velocities;
+    }
+
+    auto sol_range = ponio::make_solver_range( ponio_f, ponio::runge_kutta::rk_33(), u, {0.0, Tf}, dt );
+
+    auto it = sol_range.begin();
+
+    auto MRadaptation = samurai::make_MRAdapt( it->state );
+    
+    samurai::times::timers.start("MRadaptation");
+    MRadaptation(mr_epsilon, mr_regularity);
+    samurai::times::timers.stop("MRadaptation");
+
+    double dt_save    = nfiles == 0 ? dt : Tf / static_cast<double>(nfiles);
+    std::size_t nsave = 0, nt = 0;
+    if (nfiles != 1)
+    {
+        std::string suffix = (nfiles != 1) ? fmt::format("_ite_{}", nsave++) : "";
+        save(path, filename, it->state, suffix);
+    }
+
+    samurai::times::timers.start("tloop");
+
+    while ( it->time != Tf)
+    {
+
+        if ( balancer.require_balance( mesh ) )
+        {
+            samurai::times::timers.start("tloop.lb:"+balancer.getName());
+            balancer.load_balance(mesh, it->state);
+            samurai::times::timers.stop("tloop.lb:"+balancer.getName());
+        }
+        std::cout << fmt::format("iteration {}: t = {:.2f}, dt = {}", nt++, it->time, it->time_step) << std::endl;
+
+        // Mesh adaptation
+        samurai::times::timers.start("tloop.MRadaptation");
+        MRadaptation(mr_epsilon, mr_regularity);
+        samurai::times::timers.stop("tloop.MRadaptation");
+
+        samurai::times::timers.start("tloop.ugm");
+        samurai::update_ghost_mr( it->state );
+        samurai::times::timers.stop("tloop.ugm");
+
+        samurai::times::timers.start("tloop.resize_fill");
+        for ( auto& ki : it.meth.kis )
+        {
+            ki.resize();
+            ki.fill( 0. );
+        }
+        samurai::times::timers.stop("tloop.resize_fill");
+
+        samurai::times::timers.start("tloop.scheme");
+        ++it;
+        samurai::times::timers.stop("tloop.scheme");
+
+        // Save the result
+        samurai::times::timers.start("tloop.io");
+        if (nfiles == 0 || it->time >= static_cast<double>(nsave + 1) * dt_save || it->time == Tf)
+        {
+            if (nfiles != 1)
+            {
+                std::string suffix = (nfiles != 1) ? fmt::format("_ite_{}", nsave++) : "";
+                save(path, filename, it->state, suffix);
+            }
+            else
+            {
+                save(path, filename, it->state);
+            }
+        }
+        samurai::times::timers.stop("tloop.io");
+
+    }
+    samurai::times::timers.stop("tloop");
+
+    if constexpr (dim == 1)
+    {
+        std::cout << std::endl;
+        std::cout << "Run the following command to view the results:" << std::endl;
+        std::cout << "python <<path to samurai>>/python/read_mesh.py " << filename << "_ite_ --field u level --start 0 --end " << nsave
+                  << std::endl;
+    }
+
+    samurai::finalize();
+    return 0;
+}
diff --git a/demos/LBM/CMakeLists.txt b/demos/LBM/CMakeLists.txt
index 71d959b9d..2eb421061 100644
--- a/demos/LBM/CMakeLists.txt
+++ b/demos/LBM/CMakeLists.txt
@@ -30,7 +30,7 @@ target_link_libraries(lbm-D1Q222-euler-sod samurai)
 # 2D Paper
 
 add_executable(lbm-D2Q4444-euler-lax-liu D2Q4444_Euler_Lax_Liu.cpp)
-target_link_libraries(lbm-D2Q4444-euler-lax-liu samurai)
+target_link_libraries(lbm-D2Q4444-euler-lax-liu samurai CLI11::CLI11 ${MPI_LIBRARIES})
 
 add_executable(lbm-D2Q4444-euler-lax-liu-uniform D2Q4444_Euler_Lax_Liu_uniform.cpp)
 target_link_libraries(lbm-D2Q4444-euler-lax-liu-uniform samurai)
diff --git a/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp b/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp
index 5cb94fcef..8ec94cd52 100644
--- a/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp
+++ b/demos/LBM/D2Q4444_Euler_Lax_Liu.cpp
@@ -4,44 +4,32 @@
 #include <math.h>
 #include <vector>
 
-#include <cxxopts.hpp>
+#include <CLI/CLI.hpp>
 
+#include <samurai/bc.hpp>
 #include <samurai/field.hpp>
 #include <samurai/hdf5.hpp>
 #include <samurai/mr/adapt.hpp>
-#include <samurai/mr/mesh_with_overleaves.hpp>
+#include <samurai/mr/mesh.hpp>
+#include <samurai/reconstruction.hpp>
 #include <samurai/samurai.hpp>
-#include <samurai/statistics.hpp>
 
-#include "boundary_conditions.hpp"
-#include "prediction_map_2d.hpp"
-
-#include "utils_lbm_mr_2d.hpp"
-
-/// Timer used in tic & toc
-auto tic_timer = std::chrono::high_resolution_clock::now();
-
-/// Launching the timer
-void tic()
-{
-    tic_timer = std::chrono::high_resolution_clock::now();
-}
-
-/// Stopping the timer and returning the duration in seconds
-double toc()
-{
-    const auto toc_timer                          = std::chrono::high_resolution_clock::now();
-    const std::chrono::duration<double> time_span = toc_timer - tic_timer;
-    return time_span.count();
-}
+#include <samurai/load_balancing.hpp>
+#include <samurai/load_balancing_sfc.hpp>
+#include <samurai/load_balancing_diffusion.hpp>
+#include <samurai/load_balancing_force.hpp>
+#include <samurai/load_balancing_diffusion_interval.hpp>
+#include <samurai/load_balancing_void.hpp>
+#include <samurai/load_balancing_life.hpp>
+#include <samurai/timers.hpp>
 
 double gm = 1.4; // Gas constant
 
 template <class Config>
-auto init_f(samurai::MROMesh<Config>& mesh, int config, double lambda)
+auto init_f(samurai::MRMesh<Config>& mesh, int config, double lambda)
 {
     constexpr std::size_t nvel = 16;
-    using mesh_id_t            = typename samurai::MROMesh<Config>::mesh_id_t;
+    using mesh_id_t            = typename samurai::MRMesh<Config>::mesh_id_t;
 
     auto f = samurai::make_field<double, nvel>("f", mesh);
     f.fill(0);
@@ -192,79 +180,8 @@ auto init_f(samurai::MROMesh<Config>& mesh, int config, double lambda)
     return f;
 }
 
-template <class coord_index_t>
-auto compute_prediction(std::size_t min_level, std::size_t max_level)
-{
-    coord_index_t i = 0, j = 0;
-    std::vector<std::vector<prediction_map<coord_index_t>>> data(max_level - min_level + 1);
-
-    auto rotation_of_pi_over_two = [](int alpha, int k, int h)
-    {
-        // Returns the rotation of (k, h) of an angle alpha * pi / 2.
-        // All the operations are performed on integer, to be exact
-        int cosinus = static_cast<int>(std::round(std::cos(alpha * M_PI / 2.)));
-        int sinus   = static_cast<int>(std::round(std::sin(alpha * M_PI / 2.)));
-
-        return std::pair<int, int>(cosinus * k - sinus * h, sinus * k + cosinus * h);
-    };
-
-    // Transforms the coordinates to apply the rotation
-    auto tau = [](int delta, int k)
-    {
-        // The case in which delta = 0 is rather exceptional
-        if (delta == 0)
-        {
-            return k;
-        }
-        else
-        {
-            auto tmp = (1 << (delta - 1));
-            return static_cast<int>((k < tmp) ? (k - tmp) : (k - tmp + 1));
-        }
-    };
-
-    auto tau_inverse = [](int delta, int k)
-    {
-        if (delta == 0)
-        {
-            return k;
-        }
-        else
-        {
-            auto tmp = (1 << (delta - 1));
-            return static_cast<int>((k < 0) ? (k + tmp) : (k + tmp - 1));
-        }
-    };
-
-    for (std::size_t k = 0; k < max_level - min_level + 1; ++k)
-    {
-        int size = (1 << k);
-        data[k].resize(4);
-
-        for (int alpha = 0; alpha < 4; ++alpha)
-        {
-            for (int l = 0; l < size; ++l)
-            {
-                // The reference direction from which the other ones are
-                // computed is that of (1, 0)
-                auto rotated_in  = rotation_of_pi_over_two(alpha, tau(k, i * size - 1), tau(k, j * size + l));
-                auto rotated_out = rotation_of_pi_over_two(alpha, tau(k, (i + 1) * size - 1), tau(k, j * size + l));
-
-                // For the cells inside the domain, we can already combine
-                // entering and exiting fluxes and we have a compensation of
-                // many cells.
-                data[k][alpha] += (prediction(k, tau_inverse(k, rotated_in.first), tau_inverse(k, rotated_in.second))
-                                   - prediction(k, tau_inverse(k, rotated_out.first), tau_inverse(k, rotated_out.second)));
-            }
-        }
-    }
-    return data;
-}
-
-template <class Field, class Func, class pred>
+template <class Field>
 void one_time_step(Field& f,
-                   Func&& update_bc_for_level,
-                   const pred& pred_coeff,
                    const double lambda,
                    const double sq_rho,
                    const double sxy_rho,
@@ -273,193 +190,155 @@ void one_time_step(Field& f,
                    const double sq_e,
                    const double sxy_e)
 {
-    constexpr std::size_t nvel = Field::size;
-    using coord_index_t        = typename Field::interval_t::coord_index_t;
+    constexpr std::size_t dim = Field::dim;
+    auto& mesh                = f.mesh();
+    using mesh_id_t           = typename Field::mesh_t::mesh_id_t;
 
-    auto mesh       = f.mesh();
-    using mesh_id_t = typename decltype(mesh)::mesh_id_t;
-
-    auto min_level = mesh.min_level();
-    auto max_level = mesh.max_level();
+    std::size_t max_level = mesh.max_level();
 
-    samurai::update_ghost_mr(f, std::forward<Func>(update_bc_for_level));
-    samurai::update_overleaves_mr(f, std::forward<Func>(update_bc_for_level));
+    samurai::times::timers.start("ugm-step");
+    samurai::update_ghost_mr(f);
+    samurai::times::timers.stop("ugm-step");
 
+    samurai::times::timers.start("field-step");
     Field new_f{"new_f", mesh};
     new_f.array().fill(0.);
-    Field fluxes{"fluxes", mesh}; // This stored the fluxes computed at the level of the overleaves
-    fluxes.array().fill(0.);
     Field advected{"advected", mesh};
     advected.array().fill(0.);
+    samurai::times::timers.stop("field-step");
 
-    for (std::size_t level = min_level; level <= max_level; ++level)
-    {
-        auto leaves = samurai::intersection(mesh[mesh_id_t::cells][level], mesh[mesh_id_t::cells][level]);
-
-        if (level == max_level)
-        { // Advection at the finest level
-
-            leaves(
-                [&](auto& interval, auto& index)
-                {
-                    auto k = interval; // Logical index in x
-                    auto h = index[0]; // Logical index in y
-
-                    // We enforce a bounce-back
-                    for (int scheme_n = 0; scheme_n < 4; ++scheme_n)
-                    { // We have 4 schemes
-                        advected(0 + 4 * scheme_n, level, k, h) = f(0 + 4 * scheme_n, level, k - 1, h);
-                        advected(1 + 4 * scheme_n, level, k, h) = f(1 + 4 * scheme_n, level, k, h - 1);
-                        advected(2 + 4 * scheme_n, level, k, h) = f(2 + 4 * scheme_n, level, k + 1, h);
-                        advected(3 + 4 * scheme_n, level, k, h) = f(3 + 4 * scheme_n, level, k, h + 1);
-                    }
-                });
-        }
-        else // Advection at the coarse levels using the overleaves
+    samurai::times::timers.start("lbm-step");
+    samurai::for_each_interval(
+        mesh[mesh_id_t::cells],
+        [&](std::size_t level, auto& i, auto& index)
         {
-            auto lev_p_1  = level + 1;
-            std::size_t j = max_level - (lev_p_1);
-            double coeff  = 1. / (1 << (2 * j)); // ATTENTION A LA DIMENSION 2 !!!!
-
-            leaves.on(level + 1)([&](auto& interval, auto& index) { // This are overleaves
-                auto k = interval;                                  // Logical index in x
-                auto h = index[0];                                  // Logical index in y
-
-                for (int scheme_n = 0; scheme_n < 4; ++scheme_n)
-                {
-                    auto shift = 4 * scheme_n;
-
-                    for (std::size_t alpha = 0; alpha < 4; ++alpha)
-                    {
-                        for (auto& c : pred_coeff[j][alpha].coeff)
-                        {
-                            coord_index_t stencil_x, stencil_y;
-                            std::tie(stencil_x, stencil_y) = c.first;
-
-                            fluxes(alpha + shift, lev_p_1, k, h) += c.second * f(alpha + shift, lev_p_1, k + stencil_x, h + stencil_y);
-                        }
-                    }
-                }
-            });
+            auto j      = index[0];
+            auto jump   = max_level - level;
+            double coef = 1. / (1 << (dim * jump));
+            for (std::size_t scheme_n = 0; scheme_n < 4; ++scheme_n)
+            { // We have 4 schemes
+                advected(0 + 4 * scheme_n, level, i, j) = f(0 + 4 * scheme_n, level, i-1, j);
+                advected(1 + 4 * scheme_n, level, i, j) = f(1 + 4 * scheme_n, level, i, j-1);
+                advected(2 + 4 * scheme_n, level, i, j) = f(2 + 4 * scheme_n, level, i+1, j);
+                advected(3 + 4 * scheme_n, level, i, j) = f(3 + 4 * scheme_n, level, i, j+1);
+            
+            //     advected(
+            //         0 + 4 * scheme_n,
+            //         level,
+            //         i,
+            //         j) = f(0 + 4 * scheme_n, level, i, j)
+            //            + coef * samurai::portion(f, 0 + 4 * scheme_n, level, i - 1, j, jump, {(1 << jump) - 1, (1 << jump)}, {0, (1 << jump)})
+            //            - coef * samurai::portion(f, 0 + 4 * scheme_n, level, i, j, jump, {(1 << jump) - 1, (1 << jump)}, {0, (1 << jump)});
+
+            //     advected(
+            //         1 + 4 * scheme_n,
+            //         level,
+            //         i,
+            //         j) = f(1 + 4 * scheme_n, level, i, j)
+            //            + coef * samurai::portion(f, 1 + 4 * scheme_n, level, i, j - 1, jump, {0, (1 << jump)}, {(1 << jump) - 1, (1 << jump)})
+            //            - coef * samurai::portion(f, 1 + 4 * scheme_n, level, i, j, jump, {0, (1 << jump)}, {(1 << jump) - 1, (1 << jump)});
+
+            //     advected(2 + 4 * scheme_n, level, i, j) = f(
+            //         2 + 4 * scheme_n,
+            //         level,
+            //         i,
+            //         j) = f(2 + 4 * scheme_n, level, i, j)
+            //            + coef * samurai::portion(f, 2 + 4 * scheme_n, level, i + 1, j, jump, {0, 1}, {0, (1 << jump)})
+            //            - coef * samurai::portion(f, 2 + 4 * scheme_n, level, i, j, jump, {0, 1}, {0, (1 << jump)});
+            //     advected(3 + 4 * scheme_n,
+            //              level,
+            //              i,
+            //              j) = f(3 + 4 * scheme_n, level, i, j)
+            //                 + coef * samurai::portion(f, 3 + 4 * scheme_n, level, i, j + 1, jump, {0, (1 << jump)}, {0, 1})
+            //                 - coef * samurai::portion(f, 3 + 4 * scheme_n, level, i, j, jump, {0, (1 << jump)}, {0, 1});
+            }
 
-            leaves(
-                [&](auto& interval, auto& index)
-                {
-                    auto k = interval; // Logical index in x
-                    auto h = index[0]; // Logical index in y
-
-                    for (int alpha = 0; alpha < 16; ++alpha)
-                    {
-                        advected(alpha, level, k, h) = f(alpha, level, k, h)
-                                                     + coeff * 0.25
-                                                           * (fluxes(alpha, lev_p_1, 2 * k, 2 * h) + fluxes(alpha, lev_p_1, 2 * k + 1, 2 * h)
-                                                              + fluxes(alpha, lev_p_1, 2 * k, 2 * h + 1)
-                                                              + fluxes(alpha, lev_p_1, 2 * k + 1, 2 * h + 1));
-                    }
-                });
-        }
+            // We compute the advected momenti
+            auto m0_0 = xt::eval(advected(0, level, i, j) + advected(1, level, i, j) + advected(2, level, i, j) + advected(3, level, i, j));
+            auto m0_1 = xt::eval(lambda * (advected(0, level, i, j) - advected(2, level, i, j)));
+            auto m0_2 = xt::eval(lambda * (advected(1, level, i, j) - advected(3, level, i, j)));
+            auto m0_3 = xt::eval(
+                lambda * lambda * (advected(0, level, i, j) - advected(1, level, i, j) + advected(2, level, i, j) - advected(3, level, i, j)));
+
+            auto m1_0 = xt::eval(advected(4, level, i, j) + advected(5, level, i, j) + advected(6, level, i, j) + advected(7, level, i, j));
+            auto m1_1 = xt::eval(lambda * (advected(4, level, i, j) - advected(6, level, i, j)));
+            auto m1_2 = xt::eval(lambda * (advected(5, level, i, j) - advected(7, level, i, j)));
+            auto m1_3 = xt::eval(
+                lambda * lambda * (advected(4, level, i, j) - advected(5, level, i, j) + advected(6, level, i, j) - advected(7, level, i, j)));
+
+            auto m2_0 = xt::eval(advected(8, level, i, j) + advected(9, level, i, j) + advected(10, level, i, j) + advected(11, level, i, j));
+            auto m2_1 = xt::eval(lambda * (advected(8, level, i, j) - advected(10, level, i, j)));
+            auto m2_2 = xt::eval(lambda * (advected(9, level, i, j) - advected(11, level, i, j)));
+            auto m2_3 = xt::eval(
+                lambda * lambda
+                * (advected(8, level, i, j) - advected(9, level, i, j) + advected(10, level, i, j) - advected(11, level, i, j)));
+
+            auto m3_0 = xt::eval(advected(12, level, i, j) + advected(13, level, i, j) + advected(14, level, i, j)
+                                 + advected(15, level, i, j));
+            auto m3_1 = xt::eval(lambda * (advected(12, level, i, j) - advected(14, level, i, j)));
+            auto m3_2 = xt::eval(lambda * (advected(13, level, i, j) - advected(15, level, i, j)));
+            auto m3_3 = xt::eval(
+                lambda * lambda
+                * (advected(12, level, i, j) - advected(13, level, i, j) + advected(14, level, i, j) - advected(15, level, i, j)));
+
+            m0_1 = (1 - sq_rho) * m0_1 + sq_rho * (m1_0);
+            m0_2 = (1 - sq_rho) * m0_2 + sq_rho * (m2_0);
+            m0_3 = (1 - sxy_rho) * m0_3;
+
+            m1_1 = (1 - sq_q) * m1_1
+                 + sq_q * ((3. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (1. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (gm - 1.) * m3_0);
+            m1_2 = (1 - sq_q) * m1_2 + sq_q * (m1_0 * m2_0 / m0_0);
+            m1_3 = (1 - sxy_q) * m1_3;
+
+            m2_1 = (1 - sq_q) * m2_1 + sq_q * (m1_0 * m2_0 / m0_0);
+            m2_2 = (1 - sq_q) * m2_2
+                 + sq_q * ((3. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (1. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (gm - 1.) * m3_0);
+            m2_3 = (1 - sxy_q) * m2_3;
+
+            m3_1 = (1 - sq_e) * m3_1
+                 + sq_e
+                       * (gm * (m1_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m1_0 * m1_0 * m1_0) / (m0_0 * m0_0)
+                          - (gm / 2. - 1. / 2.) * (m1_0 * m2_0 * m2_0) / (m0_0 * m0_0));
+            m3_2 = (1 - sq_e) * m3_2
+                 + sq_e
+                       * (gm * (m2_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m2_0 * m2_0 * m2_0) / (m0_0 * m0_0)
+                          - (gm / 2. - 1. / 2.) * (m2_0 * m1_0 * m1_0) / (m0_0 * m0_0));
+            m3_3 = (1 - sxy_e) * m3_3;
+
+            new_f(0, level, i, j) = .25 * m0_0 + .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3;
+            new_f(1, level, i, j) = .25 * m0_0 + .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3;
+            new_f(2, level, i, j) = .25 * m0_0 - .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3;
+            new_f(3, level, i, j) = .25 * m0_0 - .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3;
+
+            new_f(4, level, i, j) = .25 * m1_0 + .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3;
+            new_f(5, level, i, j) = .25 * m1_0 + .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3;
+            new_f(6, level, i, j) = .25 * m1_0 - .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3;
+            new_f(7, level, i, j) = .25 * m1_0 - .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3;
+
+            new_f(8, level, i, j)  = .25 * m2_0 + .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3;
+            new_f(9, level, i, j)  = .25 * m2_0 + .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3;
+            new_f(10, level, i, j) = .25 * m2_0 - .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3;
+            new_f(11, level, i, j) = .25 * m2_0 - .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3;
+
+            new_f(12, level, i, j) = .25 * m3_0 + .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3;
+            new_f(13, level, i, j) = .25 * m3_0 + .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3;
+            new_f(14, level, i, j) = .25 * m3_0 - .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3;
+            new_f(15, level, i, j) = .25 * m3_0 - .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3;
+        });
+    
+    samurai::times::timers.stop("lbm-step");
 
-        leaves(
-            [&](auto& interval, auto& index)
-            {
-                auto k = interval; // Logical index in x
-                auto h = index[0]; // Logical index in y
-
-                // We compute the advected momenti
-                auto m0_0 = xt::eval(advected(0, level, k, h) + advected(1, level, k, h) + advected(2, level, k, h)
-                                     + advected(3, level, k, h));
-                auto m0_1 = xt::eval(lambda * (advected(0, level, k, h) - advected(2, level, k, h)));
-                auto m0_2 = xt::eval(lambda * (advected(1, level, k, h) - advected(3, level, k, h)));
-                auto m0_3 = xt::eval(
-                    lambda * lambda
-                    * (advected(0, level, k, h) - advected(1, level, k, h) + advected(2, level, k, h) - advected(3, level, k, h)));
-
-                auto m1_0 = xt::eval(advected(4, level, k, h) + advected(5, level, k, h) + advected(6, level, k, h)
-                                     + advected(7, level, k, h));
-                auto m1_1 = xt::eval(lambda * (advected(4, level, k, h) - advected(6, level, k, h)));
-                auto m1_2 = xt::eval(lambda * (advected(5, level, k, h) - advected(7, level, k, h)));
-                auto m1_3 = xt::eval(
-                    lambda * lambda
-                    * (advected(4, level, k, h) - advected(5, level, k, h) + advected(6, level, k, h) - advected(7, level, k, h)));
-
-                auto m2_0 = xt::eval(advected(8, level, k, h) + advected(9, level, k, h) + advected(10, level, k, h)
-                                     + advected(11, level, k, h));
-                auto m2_1 = xt::eval(lambda * (advected(8, level, k, h) - advected(10, level, k, h)));
-                auto m2_2 = xt::eval(lambda * (advected(9, level, k, h) - advected(11, level, k, h)));
-                auto m2_3 = xt::eval(
-                    lambda * lambda
-                    * (advected(8, level, k, h) - advected(9, level, k, h) + advected(10, level, k, h) - advected(11, level, k, h)));
-
-                auto m3_0 = xt::eval(advected(12, level, k, h) + advected(13, level, k, h) + advected(14, level, k, h)
-                                     + advected(15, level, k, h));
-                auto m3_1 = xt::eval(lambda * (advected(12, level, k, h) - advected(14, level, k, h)));
-                auto m3_2 = xt::eval(lambda * (advected(13, level, k, h) - advected(15, level, k, h)));
-                auto m3_3 = xt::eval(
-                    lambda * lambda
-                    * (advected(12, level, k, h) - advected(13, level, k, h) + advected(14, level, k, h) - advected(15, level, k, h)));
-
-                m0_1 = (1 - sq_rho) * m0_1 + sq_rho * (m1_0);
-                m0_2 = (1 - sq_rho) * m0_2 + sq_rho * (m2_0);
-                m0_3 = (1 - sxy_rho) * m0_3;
-
-                m1_1 = (1 - sq_q) * m1_1
-                     + sq_q * ((3. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (1. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (gm - 1.) * m3_0);
-                m1_2 = (1 - sq_q) * m1_2 + sq_q * (m1_0 * m2_0 / m0_0);
-                m1_3 = (1 - sxy_q) * m1_3;
-
-                m2_1 = (1 - sq_q) * m2_1 + sq_q * (m1_0 * m2_0 / m0_0);
-                m2_2 = (1 - sq_q) * m2_2
-                     + sq_q * ((3. / 2. - gm / 2.) * (m2_0 * m2_0) / (m0_0) + (1. / 2. - gm / 2.) * (m1_0 * m1_0) / (m0_0) + (gm - 1.) * m3_0);
-                m2_3 = (1 - sxy_q) * m2_3;
-
-                m3_1 = (1 - sq_e) * m3_1
-                     + sq_e
-                           * (gm * (m1_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m1_0 * m1_0 * m1_0) / (m0_0 * m0_0)
-                              - (gm / 2. - 1. / 2.) * (m1_0 * m2_0 * m2_0) / (m0_0 * m0_0));
-                m3_2 = (1 - sq_e) * m3_2
-                     + sq_e
-                           * (gm * (m2_0 * m3_0) / (m0_0) - (gm / 2. - 1. / 2.) * (m2_0 * m2_0 * m2_0) / (m0_0 * m0_0)
-                              - (gm / 2. - 1. / 2.) * (m2_0 * m1_0 * m1_0) / (m0_0 * m0_0));
-                m3_3 = (1 - sxy_e) * m3_3;
-
-                new_f(0, level, k, h) = .25 * m0_0 + .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3;
-                new_f(1, level, k, h) = .25 * m0_0 + .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3;
-                new_f(2, level, k, h) = .25 * m0_0 - .5 / lambda * (m0_1) + .25 / (lambda * lambda) * m0_3;
-                new_f(3, level, k, h) = .25 * m0_0 - .5 / lambda * (m0_2)-.25 / (lambda * lambda) * m0_3;
-
-                new_f(4, level, k, h) = .25 * m1_0 + .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3;
-                new_f(5, level, k, h) = .25 * m1_0 + .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3;
-                new_f(6, level, k, h) = .25 * m1_0 - .5 / lambda * (m1_1) + .25 / (lambda * lambda) * m1_3;
-                new_f(7, level, k, h) = .25 * m1_0 - .5 / lambda * (m1_2)-.25 / (lambda * lambda) * m1_3;
-
-                new_f(8, level, k, h)  = .25 * m2_0 + .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3;
-                new_f(9, level, k, h)  = .25 * m2_0 + .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3;
-                new_f(10, level, k, h) = .25 * m2_0 - .5 / lambda * (m2_1) + .25 / (lambda * lambda) * m2_3;
-                new_f(11, level, k, h) = .25 * m2_0 - .5 / lambda * (m2_2)-.25 / (lambda * lambda) * m2_3;
-
-                new_f(12, level, k, h) = .25 * m3_0 + .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3;
-                new_f(13, level, k, h) = .25 * m3_0 + .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3;
-                new_f(14, level, k, h) = .25 * m3_0 - .5 / lambda * (m3_1) + .25 / (lambda * lambda) * m3_3;
-                new_f(15, level, k, h) = .25 * m3_0 - .5 / lambda * (m3_2)-.25 / (lambda * lambda) * m3_3;
-            });
-    }
     std::swap(f.array(), new_f.array());
 }
 
 template <class Field>
-void save_solution(Field& f, double eps, std::size_t ite, std::string ext = "")
+void save_solution(Field& f, double eps, std::size_t ite, std::size_t freq_out, std::string ext = "")
 {
     using value_t = typename Field::value_type;
 
-    auto mesh       = f.mesh();
-    using mesh_id_t = typename decltype(mesh)::mesh_id_t;
-
-    std::size_t min_level = mesh.min_level();
-    std::size_t max_level = mesh.max_level();
-
-    std::stringstream str;
-    str << "LBM_D2Q4_3_Euler_" << ext << "_lmin_" << min_level << "_lmax-" << max_level << "_eps-" << eps << "_ite-" << ite;
+    if( ite % freq_out != 0 ) return; 
 
+    auto& mesh = f.mesh();
     auto level = samurai::make_field<std::size_t, 1>("level", mesh);
     auto rho   = samurai::make_field<value_t, 1>("rho", mesh);
     auto qx    = samurai::make_field<value_t, 1>("qx", mesh);
@@ -467,7 +346,7 @@ void save_solution(Field& f, double eps, std::size_t ite, std::string ext = "")
     auto e     = samurai::make_field<value_t, 1>("e", mesh);
     auto s     = samurai::make_field<value_t, 1>("entropy", mesh);
 
-    samurai::for_each_cell(mesh[mesh_id_t::cells],
+    samurai::for_each_cell(mesh,
                            [&](auto& cell)
                            {
                                level[cell] = cell.level;
@@ -482,449 +361,119 @@ void save_solution(Field& f, double eps, std::size_t ite, std::string ext = "")
                                s[cell] = std::log(p / std::pow(rho[cell], gm));
                            });
 
-    samurai::save(str.str().data(), mesh, rho, qx, qy, e, s, f, level);
+    std::size_t min_level = mesh.min_level();
+    std::size_t max_level = mesh.max_level();
+    std::string filename  = fmt::format("LBM_D2Q4_3_Euler_{}_lmin-{}_lmax-{}_eps-{}_ite-{}", ext, min_level, max_level, eps, ite);
+    samurai::save(filename, mesh, rho, qx, qy, e, s, f, level);
 }
 
-// Attention : the number 2 as second template parameter does not mean
-// that we are dealing with two fields!!!!
-template <class Field, class interval_t, class ordinates_t, class ordinates_t_bis>
-xt::xtensor<double, 2>
-prediction_all(const Field& f,
-               std::size_t level_g,
-               std::size_t level,
-               const interval_t& k,
-               const ordinates_t& h,
-               std::map<std::tuple<std::size_t, std::size_t, interval_t, ordinates_t_bis>, xt::xtensor<double, 2>>& mem_map)
+int main(int argc, char* argv[])
 {
-    // That is used to employ _ with xtensor
-    using namespace xt::placeholders;
-
-    // mem_map.clear(); // To be activated if we want to avoid memoization
-    auto it = mem_map.find({level_g, level, k, h});
-
-    if (it != mem_map.end() && k.size() == (std::get<2>(it->first)).size())
-    {
-        return it->second;
-    }
-    else
-    {
-        auto mesh       = f.mesh();
-        using mesh_id_t = typename decltype(mesh)::mesh_id_t;
-
-        // We put only the size in x (k.size()) because in y we only have slices
-        // of size 1. The second term (1) should be adapted according to the
-        // number of fields that we have.
-        std::vector<std::size_t> shape_x = {k.size(), 16};
-        xt::xtensor<double, 2> out       = xt::empty<double>(shape_x);
-        auto mask                        = mesh.exists(mesh_id_t::cells_and_ghosts,
-                                level_g + level,
-                                k,
-                                h); // Check if we are on a leaf or a ghost (CHECK IF IT IS OK)
-        xt::xtensor<double, 2> mask_all  = xt::empty<double>(shape_x);
-
-        for (int h_field = 0; h_field < 16; ++h_field)
-        {
-            xt::view(mask_all, xt::all(), h_field) = mask;
-        }
-
-        if (xt::all(mask)) // Recursion finished
-        {
-            return xt::eval(f(0, 16, level_g + level, k, h));
-        }
+    samurai::initialize(argc, argv);
 
-        // If we cannot stop here
-        auto kg                    = k >> 1;
-        kg.step                    = 1;
-        xt::xtensor<double, 2> val = xt::empty<double>(shape_x);
-        /*
-        --------------------
-        NW   |   N   |   NE
-        --------------------
-         W   | EARTH |   E
-        --------------------
-        SW   |   S   |   SE
-        --------------------
-        */
-
-        auto earth = xt::eval(prediction_all(f, level_g, level - 1, kg, (h >> 1), mem_map));
-        auto W     = xt::eval(prediction_all(f, level_g, level - 1, kg - 1, (h >> 1), mem_map));
-        auto E     = xt::eval(prediction_all(f, level_g, level - 1, kg + 1, (h >> 1), mem_map));
-        auto S     = xt::eval(prediction_all(f, level_g, level - 1, kg, (h >> 1) - 1, mem_map));
-        auto N     = xt::eval(prediction_all(f, level_g, level - 1, kg, (h >> 1) + 1, mem_map));
-        auto SW    = xt::eval(prediction_all(f, level_g, level - 1, kg - 1, (h >> 1) - 1, mem_map));
-        auto SE    = xt::eval(prediction_all(f, level_g, level - 1, kg + 1, (h >> 1) - 1, mem_map));
-        auto NW    = xt::eval(prediction_all(f, level_g, level - 1, kg - 1, (h >> 1) + 1, mem_map));
-        auto NE    = xt::eval(prediction_all(f, level_g, level - 1, kg + 1, (h >> 1) + 1, mem_map));
-
-        // This is to deal with odd/even indices in the x direction
-        std::size_t start_even = (k.start & 1) ? 1 : 0;
-        std::size_t start_odd  = (k.start & 1) ? 0 : 1;
-        std::size_t end_even   = (k.end & 1) ? kg.size() : kg.size() - 1;
-        std::size_t end_odd    = (k.end & 1) ? kg.size() - 1 : kg.size();
-
-        int delta_y    = (h & 1) ? 1 : 0;
-        int m1_delta_y = (delta_y == 0) ? 1 : -1; // (-1)^(delta_y)
-
-        xt::view(val, xt::range(start_even, _, 2)) = xt::view(
-            earth + 1. / 8 * (W - E) + 1. / 8 * m1_delta_y * (S - N) - 1. / 64 * m1_delta_y * (NE - NW - SE + SW),
-            xt::range(start_even, _));
-        xt::view(val, xt::range(start_odd, _, 2)) = xt::view(
-            earth - 1. / 8 * (W - E) + 1. / 8 * m1_delta_y * (S - N) + 1. / 64 * m1_delta_y * (NE - NW - SE + SW),
-            xt::range(_, end_odd));
-
-        xt::masked_view(out, !mask_all) = xt::masked_view(val, !mask_all);
-
-        for (int k_mask = 0, k_int = k.start; k_int < k.end; ++k_mask, ++k_int)
-        {
-            if (mask[k_mask])
-            {
-                xt::view(out, k_mask) = xt::view(f(0, 16, level_g + level, {k_int, k_int + 1}, h), 0);
-            }
-        }
+    CLI::App app{"Multi resolution for a D2Q4 LBM scheme for the scalar advection equation"};
 
-        // It is crucial to use insert and not [] in order not to update the
-        // value in case of duplicated (same key)
-        mem_map.insert(std::make_pair(std::tuple<std::size_t, std::size_t, interval_t, ordinates_t_bis>{level_g, level, k, h}, out));
-        return out;
-    }
-}
+    std::size_t freq_out = 1; // frequency for output in iteration
+    std::size_t total_nb_ite = 100;
+    int configuration        = 12;
 
-template <class Field, class FieldFull, class Func>
-double compute_error(Field& f, FieldFull& f_full, Func&& update_bc_for_level)
-{
-    constexpr std::size_t size = Field::size;
-    using value_t              = typename Field::value_type;
+    // Multiresolution parameters
+    std::size_t min_level = 2;
+    std::size_t max_level = 9;
+    double mr_epsilon     = 1.e-3; // Threshold used by multiresolution
+    double mr_regularity  = 2.;    // Regularity guess for multiresolution
 
-    auto mesh       = f.mesh();
-    using mesh_id_t = typename decltype(mesh)::mesh_id_t;
+    std::size_t nt_loadbalance = 10;
 
-    auto min_level = mesh.min_level();
-    auto max_level = mesh.max_level();
+    app.add_option("--nb-ite", total_nb_ite, "number of iteration")->capture_default_str();
+    app.add_option("--config", configuration, "Lax-Liu configuration")->capture_default_str();
+    app.add_option("--min-level", min_level, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--max-level", max_level, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--nt-loadbalance", nt_loadbalance, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh")->capture_default_str()
+                   ->group("Multiresolution");
+    app.add_option("--mr-reg", mr_regularity, "The regularity criteria used by the multiresolution to adapt the mesh")
+                   ->capture_default_str()->group("Multiresolution");
+    CLI11_PARSE(app, argc, argv);
 
-    auto init_mesh = f_full.mesh();
+    constexpr size_t dim = 2;
+    using Config         = samurai::MRConfig<dim, 2, 2>;
 
-    samurai::update_ghost_mr(f, std::forward<Func>(update_bc_for_level));
+    double lambda = 1. / 0.2; // This seems to work
+    double T      = 0.25;     // 0.3;//1.2;
 
-    auto f_reconstructed = samurai::make_field<value_t, size>("f_reconstructed", init_mesh);
-    f_reconstructed.fill(0.);
+    double sq_rho  = 1.9;
+    double sxy_rho = 1.;
 
-    // For memoization
-    using interval_t  = typename Field::interval_t;   // Type in X
-    using ordinates_t = typename interval_t::index_t; // Type in Y
-    std::map<std::tuple<std::size_t, std::size_t, interval_t, ordinates_t>, xt::xtensor<double, 2>> memoization_map;
-    memoization_map.clear();
+    double sq_q  = 1.75;
+    double sxy_q = 1.;
 
-    double error = 0.;
-    double norm  = 0.;
-    double dx    = 1. / (1 << max_level);
+    double sq_e  = 1.75;
+    double sxy_e = 1.;
 
-    for (std::size_t level = 0; level <= max_level; ++level)
+    if (configuration == 12)
     {
-        auto leaves_on_finest = samurai::intersection(mesh[mesh_id_t::cells][level], mesh[mesh_id_t::cells][level]);
-
-        leaves_on_finest.on(max_level)(
-            [&](auto& interval, auto& index)
-            {
-                auto k = interval;
-                auto h = index[0];
-
-                f_reconstructed(max_level, k, h) = prediction_all(f, level, max_level - level, k, h, memoization_map);
-
-                auto rho_reconstructed = f_reconstructed(0, max_level, k, h) + f_reconstructed(1, max_level, k, h)
-                                       + f_reconstructed(2, max_level, k, h) + f_reconstructed(3, max_level, k, h);
-                auto rho_full = f_full(0, max_level, k, h) + f_full(1, max_level, k, h) + f_full(2, max_level, k, h)
-                              + f_full(3, max_level, k, h);
-
-                error += xt::sum(xt::abs(rho_reconstructed - rho_full))[0];
-                norm += xt::sum(xt::abs(rho_full))[0];
-            });
+        T = .25;
     }
-    return (error / norm);
-}
-
-template <class Field, class FieldFull, class Func>
-void save_reconstructed(Field& f, FieldFull& f_full, Func&& update_bc_for_level, double eps, std::size_t ite, std::string ext = "")
-{
-    constexpr std::size_t size = Field::size;
-    using value_t              = typename Field::value_type;
-
-    auto mesh       = f.mesh();
-    using mesh_id_t = typename decltype(mesh)::mesh_id_t;
-
-    auto min_level = mesh.min_level();
-    auto max_level = mesh.max_level();
-
-    auto init_mesh = f_full.mesh();
-
-    samurai::update_ghost_mr(f, std::forward<Func>(update_bc_for_level));
-
-    auto f_reconstructed = samurai::make_field<value_t, size>("f_reconstructed", init_mesh); // To reconstruct all and
-                                                                                             // see entropy
-    f_reconstructed.fill(0.);
-
-    auto rho_reconstructed = samurai::make_field<value_t, 1>("rho_reconstructed", init_mesh);
-    auto qx_reconstructed  = samurai::make_field<value_t, 1>("qx_reconstructed", init_mesh);
-    auto qy_reconstructed  = samurai::make_field<value_t, 1>("qy_reconstructed", init_mesh);
-    auto E_reconstructed   = samurai::make_field<value_t, 1>("E_reconstructed", init_mesh);
-    auto s_reconstructed   = samurai::make_field<value_t, 1>("s_reconstructed", init_mesh);
-    auto level_            = samurai::make_field<std::size_t, 1>("level", init_mesh);
-
-    auto rho = samurai::make_field<value_t, 1>("rho", init_mesh);
-    auto qx  = samurai::make_field<value_t, 1>("qx", init_mesh);
-    auto qy  = samurai::make_field<value_t, 1>("qy", init_mesh);
-    auto E   = samurai::make_field<value_t, 1>("E", init_mesh);
-    auto s   = samurai::make_field<value_t, 1>("s", init_mesh);
-
-    // For memoization
-    using interval_t  = typename Field::interval_t;   // Type in X
-    using ordinates_t = typename interval_t::index_t; // Type in Y
-    std::map<std::tuple<std::size_t, std::size_t, interval_t, ordinates_t>, xt::xtensor<double, 2>> memoization_map;
-
-    memoization_map.clear();
-
-    for (std::size_t level = 0; level <= max_level; ++level)
+    else
     {
-        auto number_leaves = mesh.nb_cells(level, mesh_id_t::cells);
-
-        auto leaves_on_finest = samurai::intersection(mesh[mesh_id_t::cells][level], mesh[mesh_id_t::cells][level]);
-
-        leaves_on_finest.on(max_level)(
-            [&](auto& interval, auto& index)
-            {
-                auto k = interval;
-                auto h = index[0];
-
-                f_reconstructed(max_level, k, h) = prediction_all(f, level, max_level - level, k, h, memoization_map);
-
-                level_(max_level, k, h) = level;
-
-                rho_reconstructed(max_level, k, h) = f_reconstructed(0, max_level, k, h) + f_reconstructed(1, max_level, k, h)
-                                                   + f_reconstructed(2, max_level, k, h) + f_reconstructed(3, max_level, k, h);
-
-                qx_reconstructed(max_level, k, h) = f_reconstructed(4, max_level, k, h) + f_reconstructed(5, max_level, k, h)
-                                                  + f_reconstructed(6, max_level, k, h) + f_reconstructed(7, max_level, k, h);
+        T = .3;
+        // T = 0.1;
+    }
 
-                qy_reconstructed(max_level, k, h) = f_reconstructed(8, max_level, k, h) + f_reconstructed(9, max_level, k, h)
-                                                  + f_reconstructed(10, max_level, k, h) + f_reconstructed(11, max_level, k, h);
+    samurai::Box<double, dim> box({0, 0}, {1, 1});
+    samurai::MRMesh<Config> mesh(box, min_level, max_level);
 
-                E_reconstructed(max_level, k, h) = f_reconstructed(12, max_level, k, h) + f_reconstructed(13, max_level, k, h)
-                                                 + f_reconstructed(14, max_level, k, h) + f_reconstructed(15, max_level, k, h);
+    // Initialization
+    auto f = init_f(mesh, configuration, lambda); // Adaptive  scheme
+    samurai::make_bc<samurai::Neumann<1>>(f, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.);
 
-                s_reconstructed(max_level, k, h) = xt::log(
-                    ((gm - 1.)
-                     * (E_reconstructed(max_level, k, h)
-                        - .5 * (xt::pow(qx_reconstructed(max_level, k, h), 2.) + xt::pow(qy_reconstructed(max_level, k, h), 2.))
-                              / rho_reconstructed(max_level, k, h)))
-                    / xt::pow(rho_reconstructed(max_level, k, h), gm));
+    double dx = 1.0 / (1 << max_level);
+    double dt = dx / lambda;
 
-                rho(max_level, k, h) = f_full(0, max_level, k, h) + f_full(1, max_level, k, h) + f_full(2, max_level, k, h)
-                                     + f_full(3, max_level, k, h);
+    std::size_t N = static_cast<std::size_t>(T / dt);
 
-                qx(max_level, k, h) = f_full(4, max_level, k, h) + f_full(5, max_level, k, h) + f_full(6, max_level, k, h)
-                                    + f_full(7, max_level, k, h);
+    // SFC_LoadBalancer_interval<dim, Hilbert> balancer;
+    // SFC_LoadBalancer_interval<dim, Morton> balancer;
+    // Load_balancing::Life balancer;
+    // Void_LoadBalancer<dim> balancer;
+    Diffusion_LoadBalancer_cell<dim> balancer;
+    // Diffusion_LoadBalancer_interval<dim> balancer;
+    // Load_balancing::Diffusion balancer;
 
-                qy(max_level, k, h) = f_full(8, max_level, k, h) + f_full(9, max_level, k, h) + f_full(10, max_level, k, h)
-                                    + f_full(11, max_level, k, h);
 
-                E(max_level, k, h) = f_full(12, max_level, k, h) + f_full(13, max_level, k, h) + f_full(14, max_level, k, h)
-                                   + f_full(15, max_level, k, h);
 
-                s(max_level, k, h) = xt::log(
-                    ((gm - 1.)
-                     * (E(max_level, k, h) - .5 * (xt::pow(qx(max_level, k, h), 2.) + xt::pow(qy(max_level, k, h), 2.)) / rho(max_level, k, h)))
-                    / xt::pow(rho(max_level, k, h), gm));
-            });
-    }
-
-    std::stringstream str;
-    str << "LBM_D2Q4_3_Euler_Reconstruction_" << ext << "_lmin_" << min_level << "_lmax-" << max_level << "_eps-" << eps << "_ite-" << ite;
-
-    samurai::save(str.str().data(),
-                  init_mesh,
-                  rho_reconstructed,
-                  qx_reconstructed,
-                  qy_reconstructed,
-                  E_reconstructed,
-                  s_reconstructed,
-                  rho,
-                  qx,
-                  qy,
-                  E,
-                  s,
-                  level_);
-}
+    auto MRadaptation = samurai::make_MRAdapt(f);
 
-int main(int argc, char* argv[])
-{
-    samurai::initialize(argc, argv);
-
-    cxxopts::Options options("lbm_d2q4_3_Euler",
-                             "Multi resolution for a D2Q4 LBM scheme for the "
-                             "scalar advection equation");
-
-    options.add_options()("min_level", "minimum level", cxxopts::value<std::size_t>()->default_value("2"))(
-        "max_level",
-        "maximum level",
-        cxxopts::value<std::size_t>()->default_value("7"))("epsilon", "maximum level", cxxopts::value<double>()->default_value("0.0001"))(
-        "ite",
-        "number of iteration",
-        cxxopts::value<std::size_t>()->default_value("100"))("reg", "regularity", cxxopts::value<double>()->default_value("0."))(
-        "config",
-        "Lax-Liu configuration",
-        cxxopts::value<int>()->default_value("12"))("h, help", "Help");
-
-    try
+    double t = 0.;
+    samurai::times::timers.start("tloop");
+    for (std::size_t nt = 0; nt <= N && nt < total_nb_ite; ++nt)
     {
-        auto result = options.parse(argc, argv);
+        std::cout << fmt::format("\n\t> Iteration {}, t: {}, dt: {} ", nt, t, dt ) << std::endl;
 
-        if (result.count("help"))
+        if (nt % nt_loadbalance == 0 && nt > 1)
         {
-            std::cout << options.help() << "\n";
+            samurai::times::timers.start("tloop.lb");
+            balancer.load_balance(mesh, f);
+            samurai::times::timers.stop("tloop.lb");
         }
-        else
-        {
-            constexpr size_t dim = 2;
-            using Config         = samurai::MROConfig<dim, 2>;
-
-            std::size_t min_level    = result["min_level"].as<std::size_t>();
-            std::size_t max_level    = result["max_level"].as<std::size_t>();
-            std::size_t total_nb_ite = result["ite"].as<std::size_t>();
-            double eps               = result["epsilon"].as<double>();
-            double regularity        = result["reg"].as<double>();
-            int configuration        = result["config"].as<int>();
-
-            // double lambda = 1./0.3; //4.0;
-            // double lambda = 1./0.2499; //4.0;
-            double lambda = 1. / 0.2; // This seems to work
-            double T      = 0.25;     // 0.3;//1.2;
-
-            double sq_rho  = 1.9;
-            double sxy_rho = 1.;
-
-            double sq_q  = 1.75;
-            double sxy_q = 1.;
 
-            double sq_e  = 1.75;
-            double sxy_e = 1.;
+        samurai::times::timers.start("tloop.MRAdaptation");
+        MRadaptation(mr_epsilon, mr_regularity);
+        samurai::times::timers.stop("tloop.MRAdaptation");
 
-            if (configuration == 12)
-            {
-                T = .25;
-            }
-            else
-            {
-                T = .3;
-                // T = 0.1;
-            }
-
-            // // This were the old test case (version 3)
-            // double sq = 1.75;
-            // double sxy = 2.;
-            // if (configuration == 12)    {
-            //     sxy = 1.5;
-            //     T = 0.25;
-            // }
-            // else    {
-            //     sxy = 0.5;
-            //     T = 0.3;
-            // }
-
-            samurai::Box<double, dim> box({0, 0}, {1, 1});
-            samurai::MROMesh<Config> mesh(box, min_level, max_level);
-            using mesh_id_t = typename samurai::MROMesh<Config>::mesh_id_t;
-            samurai::MROMesh<Config> mesh_ref{box, max_level, max_level};
-
-            using coord_index_t = typename samurai::MROMesh<Config>::coord_index_t;
-            auto pred_coeff     = compute_prediction<coord_index_t>(min_level, max_level);
-
-            // Initialization
-            auto f     = init_f(mesh, configuration, lambda);     // Adaptive  scheme
-            auto f_ref = init_f(mesh_ref, configuration, lambda); // Reference scheme
-
-            double dx = 1.0 / (1 << max_level);
-            double dt = dx / lambda;
-
-            std::size_t N = static_cast<std::size_t>(T / dt);
-
-            std::string dirname("./LaxLiu/");
-            std::string suffix("_Config_" + std::to_string(configuration) + "_min_" + std::to_string(min_level) + "_max_"
-                               + std::to_string(max_level) + "_eps_" + std::to_string(eps));
-
-            // std::ofstream stream_number_leaves;
-            // stream_number_leaves.open
-            // (dirname+"number_leaves"+suffix+".dat");
-
-            // std::ofstream stream_number_cells;
-            // stream_number_cells.open (dirname+"number_cells"+suffix+".dat");
-
-            // std::ofstream stream_time_scheme_ref;
-            // stream_time_scheme_ref.open
-            // (dirname+"time_scheme_ref"+suffix+".dat");
-
-            // std::ofstream stream_number_leaves_ref;
-            // stream_number_leaves_ref.open
-            // (dirname+"number_leaves_ref"+suffix+".dat");
-
-            // std::ofstream stream_number_cells_ref;
-            // stream_number_cells_ref.open
-            // (dirname+"number_cells_ref"+suffix+".dat");
-
-            int howoften = 1; // How often is the solution saved ?
-
-            auto update_bc_for_level = [](auto& field, std::size_t level)
-            {
-                update_bc_D2Q4_3_Euler_constant_extension(field, level);
-            };
+        samurai::times::timers.start("tloop.LBM");
+        one_time_step(f, lambda, sq_rho, sxy_rho, sq_q, sxy_q, sq_e, sxy_e);
+        samurai::times::timers.stop("tloop.LBM");
 
-            auto MRadaptation = samurai::make_MRAdapt(f, update_bc_for_level);
+        samurai::times::timers.start("tloop.io");
+        save_solution(f, mr_epsilon, nt, freq_out);
+        samurai::times::timers.stop("tloop.io");
 
-            for (std::size_t nb_ite = 0; nb_ite <= N; ++nb_ite)
-            {
-                std::cout << std::endl << "   Iteration number = " << nb_ite << std::endl;
-
-                if (max_level > min_level)
-                {
-                    MRadaptation(eps, regularity);
-                }
-
-                if (nb_ite == N)
-                {
-                    auto error_density = compute_error(f, f_ref, update_bc_for_level);
-                    std::cout << std::endl << "#### Epsilon = " << eps << "   error = " << error_density << std::endl;
-                    save_solution(f, eps, nb_ite, std::string("final_"));
-                    save_reconstructed(f, f_ref, update_bc_for_level, eps, nb_ite);
-                }
-
-                // if (nb_ite % howoften == 0)    {
-                //     save_solution(f    , eps, nb_ite/howoften,
-                //     std::string("Config_")+std::to_string(configuration)); //
-                //     Before applying the scheme
-                // }
-
-                one_time_step(f, update_bc_for_level, pred_coeff, lambda, sq_rho, sxy_rho, sq_q, sxy_q, sq_e, sxy_e);
-                one_time_step(f_ref, update_bc_for_level, pred_coeff, lambda, sq_rho, sxy_rho, sq_q, sxy_q, sq_e, sxy_e);
-
-                auto number_leaves = mesh.nb_cells(mesh_id_t::cells);
-                auto number_cells  = mesh.nb_cells();
-
-                samurai::statistics("D2Q4444_Euler_Lax_Liu", mesh);
-                // stream_number_leaves<<number_leaves<<std::endl;
-                // stream_number_cells<<number_cells<<std::endl;
-
-                // stream_number_leaves_ref<<mesh_ref.nb_cells(mesh_id_t::cells)<<std::endl;
-                // stream_number_cells_ref<<mesh_ref.nb_cells()<<std::endl;
-            }
-            // stream_number_leaves.close();
-            // stream_number_cells.close();
-
-            // stream_number_leaves_ref.close();
-            // stream_number_cells_ref.close();
-        }
-    }
-    catch (const cxxopts::OptionException& e)
-    {
-        std::cout << options.help() << "\n";
+        t += dt;
     }
+    samurai::times::timers.stop("tloop");
+
     samurai::finalize();
     return 0;
-}
+}
\ No newline at end of file
diff --git a/demos/loadbalancing/CMakeLists.txt b/demos/loadbalancing/CMakeLists.txt
new file mode 100644
index 000000000..1d4f0b25c
--- /dev/null
+++ b/demos/loadbalancing/CMakeLists.txt
@@ -0,0 +1,7 @@
+include(FindPkgConfig)
+
+# add_executable(test-simple-2d partition.cpp)
+# target_link_libraries(test-simple-2d samurai CLI11::CLI11)
+
+add_executable(test-circle-2d circle.cpp)
+target_link_libraries(test-circle-2d samurai CLI11::CLI11 ${MPI_LIBRARIES})
\ No newline at end of file
diff --git a/demos/loadbalancing/circle.cpp b/demos/loadbalancing/circle.cpp
new file mode 100644
index 000000000..e27b9c8b6
--- /dev/null
+++ b/demos/loadbalancing/circle.cpp
@@ -0,0 +1,312 @@
+#include <iostream>
+
+// CLI
+#include <CLI/CLI.hpp>
+
+// samurai 
+#include <samurai/samurai.hpp>
+#include <samurai/timers.hpp>
+#include <samurai/field.hpp>
+#include <samurai/algorithm/update.hpp>
+
+#include <samurai/load_balancing.hpp>
+#include <samurai/load_balancing_sfc.hpp>
+#include <samurai/load_balancing_diffusion_interval.hpp>
+#include <samurai/load_balancing_diffusion_cell.hpp>
+#include <samurai/load_balancing_void.hpp>
+
+#include <samurai/mr/mesh.hpp>
+#include <samurai/box.hpp>
+#include <samurai/cell_array.hpp>
+#include <samurai/hdf5.hpp>
+#include <samurai/mr/adapt.hpp>
+#include <samurai/assertLogTrace.hpp>
+#include <samurai/schemes/fv.hpp>
+
+#include <cassert>
+#include <algorithm>
+
+#include <vector>
+
+template <class Mesh>
+auto getLevel(Mesh& mesh)
+{
+    auto level = samurai::make_field<int, 1>("level", mesh);
+
+    samurai::for_each_cell( mesh, [&]( auto & cell ) {
+        level[ cell ] = static_cast<int>( cell.level );
+    });
+
+    return level;
+}
+
+template <int dim, class Mesh>
+auto init(Mesh& mesh, const double radius, const double x_center, const double y_center )
+{
+    auto u = samurai::make_field<double, 1>("u", mesh);
+
+    samurai::for_each_cell( mesh, [&](auto& cell) {
+        auto center = cell.center();
+        double rad  = 0.;
+        double z_center = 0.5;
+        
+        if constexpr ( dim == 3 ){
+            rad = ((center[0] - x_center) * (center[0] - x_center) + 
+                   (center[1] - y_center) * (center[1] - y_center) +
+                   (center[2] - z_center) * (center[2] - z_center) );
+        }else{
+            rad = ((center[0] - x_center) * (center[0] - x_center) + 
+                   (center[1] - y_center) * (center[1] - y_center) );
+        }
+        if ( rad <= radius * radius) {
+            u[cell] = 1;
+        } else {
+            u[cell] = 0;
+        }
+    });
+
+    return u;
+}
+
+template <int dim, class Mesh, class Coord_t>
+auto initMultiCircles(Mesh& mesh, const std::vector<double> &radius, const std::vector<Coord_t> & centers )
+{
+    auto u = samurai::make_field<double, 1>("u", mesh);
+
+    samurai::for_each_cell( mesh, [&](auto& cell) {
+        u[ cell ] = 0.;
+    });
+
+    samurai::for_each_cell( mesh, [&](auto& cell) {
+        auto cc = cell.center();
+
+        for(size_t icerc=0; icerc<centers.size(); ++icerc ){
+            double rad  = 0.;
+            
+            for( size_t idim=0; idim<dim; ++idim ){
+                rad += (cc[ idim ] - centers[ icerc ][ idim ]) * (cc[ idim ] - centers[ icerc ][ idim ]);
+            }
+
+            if ( rad <= radius[ icerc ] * radius[ icerc ]) {
+                u[ cell ] += 1.;
+            } else {
+                u[ cell ] += 0.;
+            }
+        }
+
+    });
+
+    return u;
+}
+
+template<class Mesh_t, class Field_t, class Conv_t>
+void upWind(int niter_benchmark, const Mesh_t & mesh, Field_t & u2, Field_t & unp1, double dt, Conv_t & conv) {
+
+    for(int i=0; i<niter_benchmark; ++i){
+        auto c = conv(u2);
+        samurai::for_each_interval( mesh, [&]( [[maybe_unused]] std::size_t level, const auto& i, [[maybe_unused]] const auto & index ){
+            for(int64_t ii=i.index+i.start; ii<i.index+i.end; ++ii){
+                unp1[ ii ] = u2[ ii ] - dt * c[ii];
+            }
+        }
+        );
+        // unp1 = u2 - dt * 2; // conv( u2 );
+    }
+}
+
+template<int dim>
+struct Config_test_load_balancing {
+    int niter_benchmark   = 1;
+    int niter_loadbalance = 1; // number of iteration of load balancing
+    int ndomains          = 1; // number of partition (mpi processes)
+    int rank              = 0; // current mpi rank
+    std::size_t minlevel  = 6;
+    std::size_t maxlevel  = 8;
+    double dt             = 0.1;
+    double mr_regularity  = 1.0;
+    double mr_epsilon     = 2.e-4;
+    xt::xtensor_fixed<double, xt::xshape<dim>> minCorner = {0., 0., 0.};
+    xt::xtensor_fixed<double, xt::xshape<dim>> maxCorner = {1., 1., 1.};
+    std::vector<double> bubles_r;                 // bubles radius
+    std::vector<std::array<double, 3>> bubles_c;  // bubles centers
+};
+
+template<int dim, class Load_Balancer_t, typename Vel_t>
+Timers benchmark_loadbalancing( struct Config_test_load_balancing<dim> & conf, Load_Balancer_t && lb, Vel_t & velocity ){
+    
+    using Config    = samurai::MRConfig<dim>;
+    using Mesh_t    = samurai::MRMesh<Config>;
+    using mesh_id_t = typename Mesh_t::mesh_id_t;
+
+    Timers myTimers;
+
+    /**
+     * Initialize AMR mesh with spherical buble 
+    */
+
+    samurai::Box<double, dim> box( conf.minCorner, conf.maxCorner );
+
+    Mesh_t mesh( box, conf.minlevel, conf.maxlevel );
+
+    // auto u = init<dim>( mesh, radius, x_center, y_center );
+    myTimers.start("InitMesh");
+    auto u = initMultiCircles<dim>( mesh, conf.bubles_r, conf.bubles_c );
+    myTimers.stop("InitMesh");
+
+    auto lvl = getLevel( mesh );
+
+    samurai::make_bc<samurai::Dirichlet<1>>( u, 0. );
+
+    myTimers.start("mradapt");
+    auto mradapt = samurai::make_MRAdapt( u );
+    mradapt( conf.mr_epsilon, conf.mr_regularity );
+    myTimers.stop("mradapt");
+
+    // samurai::save( "init_circle_"+std::to_string( ndomains)+"_domains", mesh, lvl );
+
+    myTimers.start( lb.getName() );
+    for(int lb_iter=0; lb_iter<conf.niter_loadbalance; ++lb_iter ){
+        lb.load_balance( mesh );
+        auto s_ = fmt::format( "{}_{}_iter_{}", lb.getName(), conf.ndomains, lb_iter);
+        samurai::save( s_ , mesh );
+    }
+    myTimers.stop( lb.getName() );
+
+    std::string _stats = fmt::format( "stats_{}_process_{}", lb.getName(), conf.rank );
+    samurai::Statistics s ( _stats );
+    s( "stats", mesh );
+
+    {
+        Mesh_t newmesh( mesh[mesh_id_t::cells], mesh );
+        // auto u2 = init<dim>( newmesh, radius, x_center, y_center );
+        auto u2 = initMultiCircles<dim>( newmesh, conf.bubles_r, conf.bubles_c );
+        samurai::save( "init2_circle_"+std::to_string( conf.ndomains )+"_domains", newmesh, u2 );
+
+        auto conv = samurai::make_convection_upwind<decltype(u2)>( velocity );
+        auto unp1 = samurai::make_field<double, 1>("unp1", newmesh);
+
+        // myTimers.start( "update_ghost_mr_bf" );
+        // for(int i=0; i<conf.niter_benchmark; ++i){
+        //     samurai::update_ghost_mr( u2 );
+        // }           
+        // myTimers.stop( "update_ghost_mr_bf" );
+
+        myTimers.start( "upwind" );
+        for(int i=0; i<conf.niter_benchmark; ++i){
+            unp1 = u2 - conf.dt *  conv( u2 );
+        }
+        // upWind( conf.niter_benchmark, mesh, u2, unp1, conf.dt, conv );
+        myTimers.stop( "upwind" );
+
+        myTimers.start( "update_ghost_mr" );
+        for(int i=0; i<conf.niter_benchmark; ++i){
+            samurai::update_ghost_mr( u2 );
+        }           
+        myTimers.stop( "update_ghost_mr" );
+        
+        // auto mradapt = samurai::make_MRAdapt( u2 );
+
+        // myTimers.start( "mradapt" );
+        // mradapt( conf.mr_epsilon, conf.mr_regularity );
+        // myTimers.stop( "mradapt" );
+
+    }
+
+    return myTimers;
+}
+
+int main( int argc, char * argv[] ){
+
+    samurai::initialize(argc, argv);
+
+    constexpr int    dim = 2;
+    constexpr double cfl = 0.5;
+
+    struct Config_test_load_balancing<dim> conf;
+
+    std::string filename = "ld_mesh_init";
+
+    double radius = 0.2, x_center = 0.5, y_center = 0.5;
+    bool multi = false;
+
+    CLI::App app{"Load balancing test"};
+    app.add_option("--iter-loadbalance", conf.niter_loadbalance, "Number of desired lb iteration")
+                   ->capture_default_str()->group("Simulation parameters");
+    app.add_option("--iter-bench", conf.niter_benchmark, "Number of iteration for bench")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--min-level", conf.minlevel, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--max-level", conf.maxlevel, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--mr-eps", conf.mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh")
+        ->capture_default_str()->group("Multiresolution");
+    app.add_option("--mr-reg", conf.mr_regularity,"The regularity criteria used by the multiresolution to "
+                   "adapt the mesh")->capture_default_str()->group("Multiresolution");
+    app.add_option("--filename", filename, "File name prefix")->capture_default_str()->group("Ouput");
+    app.add_option("--radius", radius, "Bubble radius")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--xcenter", x_center, "Bubble x-axis center")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--ycenter", y_center, "Bubble y-axis center")->capture_default_str()->group("Simulation parameters");
+    app.add_flag("--multi", multi, "Multiple bubles")->group("Simulation parameters");
+    
+    CLI11_PARSE(app, argc, argv);
+
+    conf.dt = cfl / ( 1 << conf.maxlevel );
+
+    // Convection operator
+    samurai::VelocityVector<dim> velocity;
+    velocity.fill( 1 );
+    velocity( 1 ) = -1;
+
+    boost::mpi::communicator world;
+    conf.rank     = world.rank();
+    conf.ndomains = static_cast<int>( world.size() );
+
+    if( multi ) {
+        conf.bubles_r = { 0.2, 0.1, 0.05};
+        conf.bubles_c = {{ 0.2, 0.2, 0.2 }, { 0.5, 0.5, 0.5 }, { 0.8, 0.8, 0.8 }};
+    }else{
+        conf.bubles_r = { radius };
+        conf.bubles_c = {{ x_center, y_center, 0.5 }};
+    }
+
+    if( conf.rank == 0 ) std::cerr << "\n\t> Testing 'Interval propagation load balancer' " << std::endl;
+
+    if( conf.ndomains > 1 ) { // load balancing cells by cells using interface propagation
+        auto times = benchmark_loadbalancing( conf, Diffusion_LoadBalancer_interval<dim> (), velocity );
+        times.print();
+        world.barrier();
+    }
+
+    if( conf.rank == 0 ) std::cerr << "\n\t> Testing 'Gravity-based cell exchange' load balancer' " << std::endl;
+
+    if( conf.ndomains > 1 ) { // load balancing cells by cells using gravity
+        auto times = benchmark_loadbalancing( conf, Diffusion_LoadBalancer_cell<dim> (), velocity );
+        times.print();
+        world.barrier();
+    }
+
+    if( conf.rank == 0 ) std::cerr << "\n\t> Testing Morton_LoadBalancer_interval " << std::endl;
+
+    if( conf.ndomains > 1 ) { // load balancing using SFC morton
+        auto times = benchmark_loadbalancing( conf, SFC_LoadBalancer_interval<dim, Morton> (), velocity );
+        times.print();
+        world.barrier();
+    }
+
+    if( conf.rank == 0 ) std::cerr << "\n\t> Testing Hilbert_LoadBalancer_interval " << std::endl;
+
+    if( conf.ndomains > 1 ) { // load balancing using SFC hilbert
+        auto times = benchmark_loadbalancing( conf, SFC_LoadBalancer_interval<dim, Hilbert> (), velocity );
+        times.print();
+        world.barrier();
+    }
+
+     if( conf.rank == 0 ) std::cerr << "\n\t> Testing no loadbalancing (initial partitionning) " << std::endl;
+
+    { 
+        auto times = benchmark_loadbalancing( conf, Void_LoadBalancer<dim> (), velocity );
+        times.print();
+        world.barrier();
+    }
+
+    samurai::finalize();
+
+    return 0;
+}
\ No newline at end of file
diff --git a/demos/loadbalancing/partition.cpp b/demos/loadbalancing/partition.cpp
new file mode 100644
index 000000000..128c0605f
--- /dev/null
+++ b/demos/loadbalancing/partition.cpp
@@ -0,0 +1,318 @@
+#include <iostream>
+
+// CLI
+#include <CLI/CLI.hpp>
+
+// samurai 
+#include <samurai/samurai.hpp>
+#include <samurai/timers.hpp>
+#include <samurai/field.hpp>
+
+#include <samurai/load_balancing.hpp>
+
+#include <samurai/mr/mesh.hpp>
+#include <samurai/box.hpp>
+#include <samurai/cell_array.hpp>
+#include <samurai/hdf5.hpp>
+#include <samurai/mr/adapt.hpp>
+#include <samurai/assertLogTrace.hpp>
+
+#include <cassert>
+#include <algorithm>
+
+template <class Mesh>
+auto init(Mesh& mesh, const double radius, const double x_center, const double y_center )
+{
+    auto u = samurai::make_field<double, 1>("u", mesh);
+
+    samurai::for_each_cell( mesh, [&](auto& cell) {
+        auto center = cell.center();
+        double rad  = ((center[0] - x_center) * (center[0] - x_center) + 
+                       (center[1] - y_center) * (center[1] - y_center) );
+        if ( rad <= radius * radius) {
+            u[cell] = 1;
+        } else {
+            u[cell] = 0;
+        }
+    });
+
+    return u;
+}
+
+template<int dim, class CellArray_t>
+void detectInterface( std::vector<CellArray_t> & meshes, std::vector<samurai::MPI_Load_Balance> & graph ){
+
+    std::cerr << "\t> [detectInterface] Number of mesh : " << meshes.size() << std::endl;
+    
+    for(int irank=0; irank<meshes.size(); ++irank ){
+
+        std::cerr << "\t> Processing domain # " << irank << std::endl;
+
+        for( int nbi=0; nbi<graph[ irank ].neighbour.size(); ++nbi ){
+
+            auto neighbour_rank = graph[ irank ].neighbour[ nbi ];
+            std::cerr << "\t\t> Neighbour process # " << neighbour_rank << std::endl;
+
+            for( int idim=0; idim<2*dim; ++idim ){
+                
+                xt::xtensor_fixed<int, xt::xshape<dim>> stencil;
+                stencil.fill( 0 );
+
+                stencil[ idim / 2 ] = 1 - ( idim % 2 ) * 2;
+
+                std::cerr << "\t\t\t> Stencil {" << stencil[ 0 ] << ", " << stencil[ 1 ] << "}" << std::endl;
+
+                size_t nbCellsIntersection = 0, nbIntervalIntersection = 0;
+                int minlevel = meshes[ neighbour_rank ].min_level();
+                int maxlevel = meshes[ neighbour_rank ].max_level();
+
+                for( int level=minlevel; level<=maxlevel; ++level ) {
+
+                    // for each level look at level -1 / 0 / +1
+                    int minlevel_proj = meshes[ irank ].min_level();
+                    int maxlevel_proj = meshes[ irank ].max_level();
+                    for(size_t projlevel=std::max(minlevel_proj, level-1); projlevel<=std::min(maxlevel_proj, level+1); ++projlevel ){
+                        std::cerr << "\t\t\t> Looking intersection " << level << " onto level " << projlevel << std::endl;
+
+                        // intersection avec myself
+                        // translate neighbour from dir (hopefully to current)
+                        auto set = translate( meshes[ neighbour_rank ][ level ], stencil );
+                        auto intersect = intersection( set, meshes[ irank ][ projlevel ] ).on( projlevel );
+
+                        size_t nbInter_ = 0, nbCells_ = 0;
+                        intersect([&]( auto & i, auto & index ) {
+                            nbInter_ += 1;
+                            nbCells_ += i.size();
+                        });
+
+                        // we get more interval / cells, than wanted because neighbour has bigger cells
+                        if( nbInter_ > 0 && projlevel > level ){
+                            std::cerr << "\t\t\t> Too much cell selected  ... " << std::endl;
+                            auto set_  = translate( meshes[ irank ][ projlevel ], stencil );
+                            auto diff_ = difference( intersect, set_ );
+                            
+                            nbInter_ = 0;
+                            nbCells_ = 0;
+                            diff_([&]( auto & i, auto & index ) {
+                                nbInter_ += 1;
+                                nbCells_ += i.size();
+                            });
+
+                        }
+
+                        std::cerr << "\t\t\t> nbInter_ : " << nbInter_ << ", nbCells_ " << nbCells_ << std::endl;
+
+                        nbCellsIntersection    = nbCells_ ;
+                        nbIntervalIntersection = nbInter_ ;
+
+                    }
+
+                }
+                std::cerr << "\t\t> Total Ncells intersected " << nbCellsIntersection << ", nb interval : " << nbIntervalIntersection << std::endl;
+
+            }
+
+        }
+    
+    }
+
+}
+
+int main( int argc, char * argv[] ){
+
+    samurai::initialize(argc, argv);
+    Timers myTimers;
+
+    constexpr int dim = 2;
+    int ndomains = 1;
+    std::size_t minLevel = 3, maxLevel = 8;
+    double mr_regularity = 1.0, mr_epsilon = 2.e-4;
+    xt::xtensor_fixed<double, xt::xshape<dim>> minCorner = {0., 0.};
+    xt::xtensor_fixed<double, xt::xshape<dim>> maxCorner = {1., 1.};
+    std::string filename = "ld_mesh_init";
+
+    double radius = 0.2, x_center = 0.5, y_center = 0.5;
+
+    CLI::App app{"Load balancing test"};
+    app.add_option("--ndomains", ndomains, "Number of desired domains")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--min-corner", minCorner, "The min corner of the box")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--max-corner", maxCorner, "The max corner of the box")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--min-level", minLevel, "Minimum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--max-level", maxLevel, "Maximum level of the multiresolution")->capture_default_str()->group("Multiresolution");
+    app.add_option("--mr-eps", mr_epsilon, "The epsilon used by the multiresolution to adapt the mesh")
+        ->capture_default_str()->group("Multiresolution");
+    app.add_option("--mr-reg", mr_regularity,"The regularity criteria used by the multiresolution to "
+                   "adapt the mesh")->capture_default_str()->group("Multiresolution");
+    app.add_option("--filename", filename, "File name prefix")->capture_default_str()->group("Ouput");
+    app.add_option("--radius", radius, "Bubble radius")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--xcenter", x_center, "Bubble x-axis center")->capture_default_str()->group("Simulation parameters");
+    app.add_option("--ycenter", y_center, "Bubble y-axis center")->capture_default_str()->group("Simulation parameters");
+    
+    CLI11_PARSE(app, argc, argv);
+
+    using Config = samurai::MRConfig<dim>;
+    using Mesh_t = samurai::MRMesh<Config>;
+    using CellList_t = typename Mesh_t::cl_type;
+    using CellArray_t = typename Mesh_t::ca_type;
+
+    /**
+     * Initialize AMR mesh with spherical buble 
+    */
+    myTimers.start("InitMesh");
+    // samurai::Box<double, dim> box( minCorner, maxCorner );
+
+    // Mesh_t mesh(box, minLevel, maxLevel);
+
+    // auto u = init( mesh, radius, x_center, y_center );
+    // samurai::make_bc<samurai::Dirichlet>(u, 0.);
+    // auto mradapt = samurai::make_MRAdapt( u );
+    // mradapt( mr_epsilon, mr_regularity );
+
+    std::vector<CellList_t> clists( 3 );
+    std::vector<CellArray_t> carrays( 3 );
+
+    clists[0][1][{0}].add_interval({1, 4});
+    clists[0][1][{1}].add_interval({1, 4});
+    clists[0][1][{2}].add_interval({0, 4});
+    clists[0][1][{3}].add_interval({0, 4});
+    clists[0][2][{0}].add_interval({0, 2});
+    clists[0][2][{1}].add_interval({0, 2});
+    clists[0][2][{2}].add_interval({0, 2});
+    clists[0][2][{3}].add_interval({0, 2});
+
+    // subdomain 2
+    clists[1][0][{2}].add_interval({0, 4});
+    clists[1][0][{3}].add_interval({0, 4});
+
+    // subdomain 3
+    clists[2][0][{0}].add_interval({2, 4});
+    clists[2][0][{1}].add_interval({2, 4});
+
+    carrays[0] = { clists[0], true };
+    carrays[1] = { clists[1], true };
+    carrays[2] = { clists[2], true };
+
+    myTimers.stop("InitMesh");
+
+    std::vector<samurai::MPI_Load_Balance> graph( 3 );
+    { // fill graph info
+        
+        // find a way to compute this 
+        graph[ 0 ].neighbour = {1, 2};
+        graph[ 1 ].neighbour = {0, 2};
+        graph[ 2 ].neighbour = {0, 1};
+
+        // fill "current" load  ----> no MPI comm, local info
+        for(size_t i=0; i<3; ++i ){
+            graph[ i ]._load = carrays[ i ].nb_cells();
+        }
+
+        // fill neighbour load -----> this will imply MPI comm
+        for(size_t i=0; i<3; ++i ){      
+            graph[ i ].load.resize( graph[ i ].neighbour.size() );
+            for(size_t ni=0; ni<graph[ i ].neighbour.size(); ++ni ){
+                auto neighbour_rank = graph[ i ].neighbour[ ni ];
+                graph[ i ].load[ ni ] = graph[ neighbour_rank ]._load ;
+            }
+        }
+    }
+
+    CellList_t global;
+    for( int level=0; level<3; ++level){
+        auto un = samurai::union_( carrays[0][level], carrays[1][level], carrays[2][level] );
+
+        un([&]( auto & i, auto & index ) {
+            global[ level ][ index ].add_interval( i );
+        });
+    }
+
+
+    detectInterface<dim>( carrays, graph );
+
+
+    CellArray_t mesh = { global, true };
+
+    auto init_rank = samurai::make_field<int, 1>( "init_rank", mesh);
+
+    { // initialize fake_rank
+        
+        for(size_t m_=0; m_<carrays.size(); ++m_ ){
+
+            for( int level=std::min( mesh.min_level(), carrays[ m_ ].min_level()); level<4; ++level ) {
+                auto intersect = intersection( mesh[ level ], carrays[ m_ ][ level ]);
+
+                intersect([&]( auto & i, auto & index ) {
+                    init_rank( level, i, index ) = static_cast<int>( m_ );
+                });
+            }
+        }
+
+    }
+
+    // output mesh 
+    SAMURAI_LOG( "\t> Initial configurtion : " + filename );
+    samurai::save( filename, mesh, init_rank );
+
+    myTimers.start( "load_balance_fluxes" );
+    compute_load_balancing_fluxes( graph );
+    myTimers.start( "load_balance_fluxes" );
+
+    // explicitly call load_balancing();
+
+    // std::cerr << "\n\n" << std::endl;
+    // SAMURAI_LOG( "[partition] Explicitly calling load_balancing() ... ");
+
+    // for(int il=0; il<3; ++il ){
+    //     myTimers.start("load-balancing");
+    //     mesh.force_load_balancing();
+    //     myTimers.stop("load-balancing");
+
+    //     // output mesh 
+    //     std::cerr << "\t> Dumping mesh to file : " << "load-balanced" << std::endl;
+    //     samurai::save( "load-balanced_"+std::to_string(il), mesh, u );
+    // }
+
+    auto sfc_morton_rank = samurai::make_field<int, 1>( "sfc_morton_rank", mesh);
+
+    myTimers.start( "balance_morton" );
+    // perform_load_balancing_SFC<dim>( mesh, ndomains, sfc_morton_rank );
+    myTimers.stop( "balance_morton" );
+
+
+    auto diffusion_rank = samurai::make_field<int, 1>( "diffusion_rank", mesh);
+
+    { // initialize fake_rank
+        
+        for(size_t m_=0; m_<carrays.size(); ++m_ ){
+
+            for( int level=std::min( mesh.min_level(), carrays[ m_ ].min_level()); level<4; ++level ) {
+                auto intersect = intersection( mesh[ level ], carrays[ m_ ][ level ]);
+
+                intersect([&]( auto & i, auto & index ) {
+                    diffusion_rank( level, i, index ) = static_cast<int>( m_ );
+                });
+            }
+        }
+
+    }
+
+    samurai::save( "load_balanced_diff_init_"+std::to_string( ndomains), mesh, sfc_morton_rank, diffusion_rank );
+
+    for_each_cell( mesh, [&](const auto & cell ){
+        diffusion_rank[ cell ] = -1;
+    });
+
+    myTimers.start( "balance_diffusion" );
+    perform_load_balancing_diffusion<dim>( mesh, carrays, ndomains, graph, diffusion_rank );
+    myTimers.stop( "balance_diffusion" );
+
+    filename = "load_balanced_diff_" + std::to_string( ndomains);
+    SAMURAI_LOG( "\t> Generated file : " + filename );
+    samurai::save( filename, mesh, sfc_morton_rank, diffusion_rank );
+    
+
+    myTimers.print();
+    samurai::finalize();
+
+    return EXIT_SUCCESS;
+}
\ No newline at end of file
diff --git a/include/samurai/assertLogTrace.hpp b/include/samurai/assertLogTrace.hpp
new file mode 100644
index 000000000..a2f771c20
--- /dev/null
+++ b/include/samurai/assertLogTrace.hpp
@@ -0,0 +1,22 @@
+#pragma once 
+
+#include <iostream>
+
+#define SAMURAI_ASSERT(condition, msg) \
+do { \
+    if (! (condition)) { \
+        std::cerr << "Assertion failed \nin " << __FILE__ \
+                    << "\n @line " << __LINE__ << ": " << msg << std::endl; \
+        std::terminate(); \
+    } \
+} while (false)
+
+//#ifdef NDEBUG
+#define SAMURAI_LOG( msg ) do { std::cerr <<  "SMR::Log:: " \
+                                          <<  msg << std::endl; } while (0)
+
+#define SAMURAI_TRACE( msg ) do { std::cerr <<  "SMR::Trace[line " << __LINE__ << "] :" \
+                                          <<  msg << std::endl; } while (0)
+//#else
+//#define MGS_LOG( msg )
+//#endif
diff --git a/include/samurai/boundary.hpp b/include/samurai/boundary.hpp
index 3e0fcdcb7..48531d56b 100644
--- a/include/samurai/boundary.hpp
+++ b/include/samurai/boundary.hpp
@@ -10,6 +10,9 @@ namespace samurai
         using mesh_id_t = typename Mesh::mesh_id_t;
 
         auto& cells = mesh[mesh_id_t::cells][level];
+        // auto& domain = mesh.domain();
+	// REBASE FIXME : I don't know if we need to override domain 
+        //auto& domain = mesh.subdomain();
 
         auto max_level    = domain.level(); // domain.level();//mesh[mesh_id_t::cells].max_level();
         auto one_interval = layer_width << (max_level - level);
diff --git a/include/samurai/cell_array.hpp b/include/samurai/cell_array.hpp
index ce7d625c7..f44e5886c 100644
--- a/include/samurai/cell_array.hpp
+++ b/include/samurai/cell_array.hpp
@@ -96,10 +96,8 @@ namespace samurai
 
         template <typename... T>
         const interval_t& get_interval(std::size_t level, const interval_t& interval, T... index) const;
-
         template <class E>
         const interval_t& get_interval(std::size_t level, const interval_t& interval, const xt::xexpression<E>& index) const;
-
         template <class E>
         const interval_t& get_interval(std::size_t level, const xt::xexpression<E>& coord) const;
 
@@ -120,6 +118,9 @@ namespace samurai
         std::size_t nb_cells() const;
         std::size_t nb_cells(std::size_t level) const;
 
+        std::size_t nb_intervals(std::size_t dim) const;
+        std::size_t nb_intervals(std::size_t dim, std::size_t level) const;
+
         std::size_t max_level() const;
         std::size_t min_level() const;
 
@@ -384,6 +385,38 @@ namespace samurai
         return m_cells[level].nb_cells();
     }
 
+    /**
+     * Return the number of intervals for a given dimension over the levels.
+     *
+     * @param d The dimension where to compute the number of intervals
+     * @return The number of intervals for the given dimension
+     */
+    template <std::size_t dim_, class TInterval, std::size_t max_size_>
+    inline std::size_t CellArray<dim_, TInterval, max_size_>::nb_intervals(std::size_t d) const
+    {
+        assert(d < dim);
+        std::size_t size = 0;
+        for (std::size_t level = 0; level <= max_size; ++level)
+        {
+            size += m_cells[level].nb_intervals(d);
+        }
+        return size;
+    }
+
+    /**
+     * Return the number of intervals for a given dimension and a given level.
+     *
+     * @param d The dimension where to compute the number of intervals
+     * @param level The level where to compute the number of intervals
+     * @return The number of intervals for the given dimension
+     */
+    template <std::size_t dim_, class TInterval, std::size_t max_size_>
+    inline std::size_t CellArray<dim_, TInterval, max_size_>::nb_intervals(std::size_t d, std::size_t level) const
+    {
+        assert(d < dim);
+        return m_cells[level].nb_intervals(d);
+    }
+
     /**
      * Return the maximum level where the array entry is not empty.
      */
diff --git a/include/samurai/cell_list.hpp b/include/samurai/cell_list.hpp
index 24a5da7ef..7cb128d54 100644
--- a/include/samurai/cell_list.hpp
+++ b/include/samurai/cell_list.hpp
@@ -34,6 +34,17 @@ namespace samurai
         const lcl_type& operator[](std::size_t i) const;
         lcl_type& operator[](std::size_t i);
 
+        inline bool empty() const { 
+            
+            bool rst = true;
+            
+            for(const auto & lcl : m_cells ){ 
+                rst &= lcl.empty();
+            }
+
+            return rst;
+        }
+
         void to_stream(std::ostream& os) const;
 
         auto& origin_point() const;
diff --git a/include/samurai/hdf5.hpp b/include/samurai/hdf5.hpp
index 94bbfc8ad..9d4a3b19b 100644
--- a/include/samurai/hdf5.hpp
+++ b/include/samurai/hdf5.hpp
@@ -93,7 +93,7 @@ namespace samurai
         {
             std::size_t index = 0;
             for_each_cell(submesh,
-                          [&](auto cell)
+                          [&](const auto & cell)
                           {
                               if constexpr (Field::size == 1)
                               {
@@ -136,7 +136,7 @@ namespace samurai
         std::size_t id    = 0;
         std::size_t index = 0;
         for_each_cell(mesh,
-                      [&](auto cell)
+                      [&](const auto & cell)
                       {
                           std::array<double, dim> a;
                           auto start_corner = cell.corner();
diff --git a/include/samurai/hilbert.hpp b/include/samurai/hilbert.hpp
new file mode 100644
index 000000000..0ca52adca
--- /dev/null
+++ b/include/samurai/hilbert.hpp
@@ -0,0 +1,425 @@
+#pragma once
+
+#include <vector>
+#include "sfc.hpp"
+
+class Hilbert : public SFC<Hilbert> {
+
+    private:
+        // const int _nbits = sizeof(int) * 8;
+
+        // State based algorithm
+        
+        // const std::vector<std::vector<std::vector<int>>> _2D_STATE = {{{1, 0, 2, 3}, {0, 0, 2, 2}},
+        //                                                               {{0, 3, 2, 1}, {1, 3, 1, 3}},
+        //                                                               {{2, 1, 0, 3}, {3, 1, 3, 1}},
+        //                                                               {{0, 1, 3, 2}, {2, 2, 0, 0}}};
+
+        // const std::vector<std::vector<std::vector<int>>> _3D_STATE = {{{1,2,0,6,11,4,5,6,10,4,7,10},
+        //                                                                {0,0,0,2,4,6,4,6,2,2,4,6}},
+        //                                                               {{2,6,9,0,11,4,7,1,3,4,2,3},
+        //                                                                {1,7,3,3,3,5,7,7,5,1,5,1}},
+        //                                                               {{3,0,10,6,0,8,5,6,1,8,11,2},
+        //                                                                {3,1,7,1,5,1,3,5,3,5,7,7}},
+        //                                                               {{2,7,9,11,7,8,3,10,1,8,2,6},
+        //                                                                {2,6,4,0,2,2,0,4,4,6,6,0}},
+        //                                                               {{4,8,1,9,5,0,1,9,10,2,7,10},
+        //                                                                {7,3,1,5,7,7,5,1,1,3,3,5}},
+        //                                                               {{5,8,1,0,9,6,1,4,3,7,5,3},
+        //                                                                {6,4,2,4,0,4,6,0,6,0,2,2}},
+        //                                                               {{3,0,11,9,0,10,11,9,5,2,8,4},
+        //                                                                {4,2,6,6,6,0,2,2,0,4,0,4}},
+        //                                                               {{5,7,11,8,7,6,11,10,9,3,5,4},
+        //                                                                {5,5,5,7,1,3,1,3,7,7,1,3}}};
+
+        // const std::vector<std::vector<std::vector<int>>> _3D_STATE_r = {{{1,2,0,11,9,10,3,4,5,7,8,6},
+        //                                                                  {0,0,0,3,5,6,3,5,6,5,6,3}},
+        //                                                                 {{2,0,1,6,7,8,11,9,10,4,5,3},
+        //                                                                  {1,2,4,2,7,2,7,4,4,1,7,1}},
+        //                                                                 {{2,0,1,6,7,8,11,9,10,4,5,3},
+        //                                                                  {3,6,5,0,3,3,6,6,0,0,5,5}},
+        //                                                                 {{3,8,9,0,11,6,5,10,1,2,7,4},
+        //                                                                  {2,4,1,1,1,7,2,7,2,4,4,7}},
+        //                                                                 {{3,8,9,0,11,6,5,10,1,2,7,4},
+        //                                                                  {6,5,3,5,0,5,0,3,3,6,0,6}},
+        //                                                                 {{5,7,11,9,0,4,1,6,3,8,2,10},
+        //                                                                  {7,7,7,4,2,1,4,2,1,2,1,4}},
+        //                                                                 {{5,7,11,9,0,4,1,6,3,8,2,10},
+        //                                                                  {5,3,6,6,6,0,5,0,5,3,3,0}},
+        //                                                                 {{4,6,10,8,5,0,7,1,9,3,11,2},
+        //                                                                  {4,1,2,7,4,4,1,1,7,7,2,2}}};
+
+
+        // def __init__(self, indexbits=16, dim=2):
+        // """
+        //     initialize hilbert class object
+        // """
+        // assert indexbits > 0
+        // assert 2 <= dim <= 3, "Dimension expected to be 2 or 3"
+
+        // self._max_hindex = (2 ** indexbits) - 1
+        // self._nbits = indexbits
+        // self._dimension = dim
+
+        // # print("\t> Max H-index: 2^{}-1 = {} !".format(indexbits, self._max_hindex))
+
+        template<int dim, class Coord_t>
+        Coord_t AxestoTranspose( const Coord_t & c ) const {
+
+            constexpr int _nbits = sizeof( uint32_t ) * 8;
+
+            Coord_t _transposed;
+            for(size_t idim=0; idim<dim; ++idim )
+                _transposed( idim ) = c( idim );
+
+            uint32_t m = 1U << ( _nbits - 1 );
+
+            uint32_t q = m;
+            while( q > 1 ){
+                uint32_t p = q - 1;
+                for( size_t j = 0; j<dim; ++j ){
+                    if( _transposed( j ) & q ){
+                        _transposed( 0 ) ^= p;
+                    }else{
+                        uint32_t t = ( _transposed( 0 ) ^ _transposed( j ) ) & p;
+                        _transposed( 0 ) ^= t;
+                        _transposed( j ) ^= t;
+                    }
+                }
+                q >>= 1;
+            }
+
+            // Gray code
+            for( int i=1; i<dim; ++i){
+                _transposed( i ) ^= _transposed( i - 1 );
+            }
+
+            uint32_t t = 0;
+            q = m;
+            while( q > 1 ){
+                if( _transposed( dim - 1 ) & q ){
+                    t ^= ( q - 1 );
+                }
+                q >>= 1;
+            }
+
+            for(int i=0; i<dim; ++i){ 
+                _transposed( i ) ^= t;
+            }
+
+            return _transposed;
+
+        }
+
+    // def TransposeToAxes(self, x):
+    //     """
+    //         From axes to Hilbert
+    //     :param x: position
+    //     :return:
+    //     """
+    //     m = 2 << (self._nbits-1)
+
+    //     # gray decode
+    //     t = x[self._dimension-1] >> 1
+    //     for i in range(self._dimension-1, 0, -1):
+    //         x[i] ^= x[i-1]
+    //     x[0] ^= t
+
+    //     q = 2
+    //     while q != m:
+    //         p = q - 1
+    //         for i in range(self._dimension-1, -1, -1):
+    //             if x[i] & q:
+    //                 x[0] ^= p
+    //             else:
+    //                 t = (x[0] ^ x[i]) & p
+    //                 x[0] ^= t
+    //                 x[i] ^= t
+    //         q <<= 1
+
+    public:
+
+        inline std::string getName() const { return "Hilbert"; }
+
+        template<class Coord_t>
+        inline SFC_key_t getKey_2D_impl( const Coord_t & lc ) const {
+            // SAMURAI_TRACE( "Hilbert::getKey_2D_impl Entering" );
+
+            // Be sure it is positive, shift must be done before
+            assert( lc( 0 ) >= 0 );
+            assert( lc( 1 ) >= 0 );
+
+            constexpr int dim = 2;
+            constexpr int _nbits = sizeof( lc( 0 ) ) * 8;
+
+            // std::cerr << "\t\t> Coord (" << coord(0) << ", " << coord(1)  << std::endl;
+            xt::xtensor_fixed<uint32_t, xt::xshape<dim>> coord = AxestoTranspose<dim>( lc );
+            // std::cerr << "\t\t> Coord (" << coord(0) << ", " << coord(1) << std::endl;
+
+            int nb = ( _nbits * dim ) - 1;
+            SFC_key_t tmp = 0;
+
+            for(int jj=_nbits-1; jj>= 0; --jj){
+                for( int ii=0; ii<dim; ++ii){
+
+                    uint32_t a =  coord( ii ) >> jj;
+                    // std::cerr << " jj : " << jj << ", a : " << a ;
+
+                    if( ( a & static_cast<uint32_t>( 1 ) ) == 1 ){
+                        SFC_key_t ad = ( static_cast<SFC_key_t>( 1 ) << nb );
+                        // std::cerr << " nb : " << nb;
+                        tmp += ad;
+                    }
+                    nb --;
+                }
+            }
+
+            return tmp;
+        }
+
+        template<class Coord_t>
+        inline SFC_key_t getKey_3D_impl( const Coord_t & lc ) const {
+            // SAMURAI_TRACE( "Hilbert::getKey_2D_impl Entering" );
+
+            // Be sure it is positive, shift must be done before
+            assert( lc( 0 ) >= 0 );
+            assert( lc( 1 ) >= 0 );
+            assert( lc( 2 ) >= 0 );
+
+            constexpr int dim = 3;
+            constexpr int _nbits = sizeof( lc( 0 ) ) * 8;
+
+            // std::cerr << "\t\t> Coord (" << coord(0) << ", " << coord(1)  << std::endl;
+            xt::xtensor_fixed<uint32_t, xt::xshape<dim>> coord = AxestoTranspose<dim>( lc );
+            // std::cerr << "\t\t> Coord (" << coord(0) << ", " << coord(1) << std::endl;
+
+            int nb = ( _nbits * dim ) - 1;
+            SFC_key_t tmp = 0;
+
+            for(int jj=_nbits-1; jj>= 0; --jj){
+                for( int ii=0; ii<dim; ++ii){
+
+                    uint32_t a =  coord( ii ) >> jj;
+                    // std::cerr << " jj : " << jj << ", a : " << a ;
+
+                    if( ( a & static_cast<uint32_t>( 1 ) ) == 1 ){
+                        SFC_key_t ad = ( static_cast<SFC_key_t>( 1 ) << nb );
+                        // std::cerr << " nb : " << nb;
+                        tmp += ad;
+                    }
+                    nb --;
+                }
+            }
+
+            return tmp;
+        }
+
+        /*
+        * Return the hilbert index from logical coordinate (i,j) or (i,j,k)
+        *
+        * Parameters:
+        *             lc    : structure containing logical coordinate
+        *             level : used for multiresolution
+        * Return :
+        *             key    : hilbert key
+        */
+        // inline SFC_key_t getKey_2D_impl_state( const Logical_coord & lc, int lvl ) const {
+        //     SAMURAI_TRACE( "Hilbert::getKey_2D_impl Entering " );
+
+        //     SFC_key_t la_clef = 0;
+            
+        //     constexpr int dim = 2;
+        //     constexpr int twotondim = 1 << dim;
+
+        //     std::vector<std::vector<int>> ind_bits( lvl, std::vector<int>( dim ) );
+        //     std::vector<int> h_digits( lvl, 0 );
+
+        //     SFC_key_t xyz[dim] = { lc.i, lc.j };
+
+        //     // Set ind_bits for current point
+        //     for (int ibit = 0; ibit < lvl; ++ibit) {
+        //         for( int idim=0; idim<dim; ++idim ) {
+
+        //             assert( ibit < ind_bits.size() );
+        //             ind_bits[ibit][idim] = (xyz[idim] >> ibit) & 1;
+        //         }
+        //     }
+        //     SAMURAI_TRACE( "Hilbert::getKey_2D_impl set ind_bits " );
+
+        //     // Compute Hilbert key bits
+        //     int cur_state = 0;
+        //     int new_state;
+        //     for(int ibit=lvl-1; ibit>-1; --ibit) {
+        //         SAMURAI_LOG( "ibit : " + std::to_string( ibit ) );
+
+        //         // Compute s_digit by interleaving bits
+        //         int s_digit = 0;
+        //         for(int idim=0; idim<dim; ++idim){
+        //             SAMURAI_LOG( "s_digit : " + std::to_string( s_digit ) );
+        //             s_digit += (ind_bits[ibit][idim]) << (dim - idim); // dim - 1 - idim
+        //         }
+                
+        //         // Compute the new state from the state diagram
+        //         SAMURAI_LOG( "s_digit out: " + std::to_string( s_digit ) );
+        //         SAMURAI_LOG( "cur_state : " + std::to_string( cur_state ) );
+
+        //         std::cerr << _2D_STATE.size() << std::endl;
+
+        //         // Compute the new state from the state diagram
+        //         assert( s_digit < 4 );
+        //         new_state = _2D_STATE[s_digit][0][cur_state];
+        //         h_digits[ibit] = _2D_STATE[s_digit][1][cur_state];
+
+
+        //         SAMURAI_LOG( "new_state : " + std::to_string( new_state ) );
+
+        //         cur_state = new_state;
+        //     }
+        //     SAMURAI_TRACE( "Hilbert::getKey_2D_impl computed hilbert bits " );
+
+        //     // Assemble the point's key
+        //     for(int ibit=0; ibit<lvl; ++ibit) {
+        //         la_clef += std::pow( twotondim, ibit ) * h_digits[ibit];
+        //     }
+        //     SAMURAI_TRACE( "Hilbert::getKey_2D_impl assemble hilbert key " );
+
+        //     return la_clef;
+        // }
+
+        // template<typename Coord_t>
+        // inline SFC_key_t getKey_3D_impl( const Coord_t & lc, int lvl ) const {
+        //     SFC_key_t la_clef = 0;
+
+        //     constexpr int dim = 3;
+
+        //     int twotondim = 1 << dim;
+        //     int ind_bits[lvl][dim];
+        //     int h_digits[lvl];
+
+        //     SFC_key_t xyz[ dim ] = { lc( 0 ), lc( 1 ), lc( 2 ) };
+
+        //     // Set ind_bits for current point
+        //     for (int ibit = 0; ibit < lvl; ++ibit) {
+        //         for(size_t idim=0; idim<dim; ++idim) {
+        //             ind_bits[ibit][idim] = ( xyz[idim] >> ibit ) & 1;
+        //         }
+        //     }
+
+        //     // Compute Hilbert key bits
+        //     int cur_state = 0;
+        //     int new_state;
+        //     for(int ibit=lvl-1; ibit>-1; --ibit) {
+
+        //         // Compute s_digit by interleaving bits
+        //         int s_digit = 0;
+        //         for(size_t idim=0; idim<dim; ++idim)
+        //             s_digit += (ind_bits[ibit][idim]) << ( dim - 1 - idim );
+
+        //         // Compute the new state from the state diagram
+        //         new_state = _3D_STATE[s_digit][0][cur_state];
+        //         h_digits[ibit] = _3D_STATE[s_digit][1][cur_state];
+
+        //         cur_state = new_state;
+        //     }
+
+        //     // Assemble the point's key
+        //     for(int ibit=0; ibit<lvl; ++ibit) {
+        //         la_clef += static_cast<SFC_key_t>( std::pow(twotondim, ibit) * h_digits[ibit] );
+        //     }
+
+        //     return la_clef;
+        // }
+
+
+};
+
+// /*
+//  * Return the logical coordinate from a hilbert key encoded in long double.
+//  *
+//  * Parameters:
+//  *             long double hkey : hilbert key on float128
+//  *             order            : hilbert order (level)
+//  *             dim              : dimension should be 3D here
+//  */
+// inline std::vector<int> get_logical_from_hilbert_3D(long double hkey, int order, int dim){
+
+//     int twotondim = 1 << dim;
+//     std::vector<int> ind_array(dim);
+//     std::vector<int> h_digits(order);
+//     std::vector<std::vector<int>> ind_bits( order, std::vector<int> (dim));
+
+//     // Compute Hilbert key bits
+//     for(int ibit=0; ibit<order; ++ibit){
+//         h_digits[ibit] = int( static_cast<long int>((hkey / std::pow(twotondim, ibit))) % twotondim);
+//         hkey -= h_digits[ibit] * std::pow(twotondim, ibit);
+//     }
+
+//     // Compute indices bits
+//     int cur_state = 0, new_state, s_digit = 0;
+//     for(int ibit=order-1; ibit>-1; --ibit) {
+
+//         // Compute the new s_digit from the state diagram
+//         new_state = _3D_STATE_r[h_digits[ibit]][0][cur_state];
+//         s_digit   = _3D_STATE_r[h_digits[ibit]][1][cur_state];
+//         cur_state = new_state;
+
+//         // Compute ind_bitd
+//         for (int idim = 0; idim < dim; ++idim) {
+//             ind_bits[ibit][idim] = (s_digit >> (dim - 1 - idim)) & 1;
+//         }
+//     }
+
+//     // Set indices for current key
+//     for(int ibit=0; ibit<order; ++ibit)
+//         for(int idim=0; idim<dim; ++idim)
+//             ind_array[idim] += ind_bits[ibit][idim] << ibit;
+
+//     return ind_array;
+// }
+
+// /*
+//  * Return the logical coordinate from a hilbert key encoded in long double.
+//  *
+//  * Parameters:
+//  *             long double hkey : hilbert key on float128
+//  *             order            : hilbert order (level)
+//  *             dim              : dimension should be 3D here
+//  */
+// inline
+// Logical_Pos_t getLogicalFromHilberKey3D( uint64_t hkey, int level ){
+
+//     constexpr int dim = 3;
+
+//     int twotondim = 1 << dim;
+//     uint32_t ind_array[dim] = {0, 0, 0};
+//     int h_digits[level];
+//     int ind_bits[level][dim];
+
+//     // Compute Hilbert key bits
+//     for(int ibit=0; ibit<level; ++ibit){
+//         h_digits[ibit] = int( static_cast<long int>( (hkey / std::pow( twotondim, ibit )) ) % twotondim);
+//         hkey -= h_digits[ibit] * std::pow(twotondim, ibit);
+//     }
+
+//     // Compute indices bits
+//     int cur_state = 0, new_state, s_digit = 0;
+//     for(int ibit=level-1; ibit>-1; --ibit) {
+
+//         // Compute the new s_digit from the state diagram
+//         new_state = _HILBERT_3D_STATE_r[h_digits[ibit]][0][cur_state];
+//         s_digit = _HILBERT_3D_STATE_r[h_digits[ibit]][1][cur_state];
+//         cur_state = new_state;
+
+//         // Compute ind_bitd
+//         for (int idim = 0; idim < dim; ++idim) {
+//             ind_bits[ibit][idim] = (s_digit >> (dim - 1 - idim)) & 1;
+//         }
+//     }
+
+//     // Set indices for current key
+//     for(int ibit=0; ibit<level; ++ibit)
+//         for(int idim=0; idim<dim; ++idim)
+//             ind_array[idim] += ind_bits[ibit][idim] << ibit;
+
+//     return Logical_Pos_t { ind_array[0], ind_array[1], ind_array[2] };
+// }
\ No newline at end of file
diff --git a/include/samurai/level_cell_array.hpp b/include/samurai/level_cell_array.hpp
index 9b62a9179..a69baa50b 100644
--- a/include/samurai/level_cell_array.hpp
+++ b/include/samurai/level_cell_array.hpp
@@ -119,6 +119,14 @@ namespace samurai
         /// Display to the given stream
         void to_stream(std::ostream& os) const;
 
+        // get_interval_location
+        template <typename... T, typename = std::enable_if_t<std::conjunction_v<std::is_convertible<T, value_t>...>, void>>
+        std::size_t get_interval_location(const interval_t& interval, T... index) const;
+        template <class E>
+        std::size_t get_interval_location(const interval_t& interval, const xt::xexpression<E>& index) const;
+        template <class E>
+        std::size_t get_interval_location(const xt::xexpression<E>& coord) const;
+
         // get_interval
         template <typename... T, typename = std::enable_if_t<std::conjunction_v<std::is_convertible<T, value_t>...>, void>>
         const interval_t& get_interval(const interval_t& interval, T... index) const;
@@ -151,6 +159,9 @@ namespace samurai
         //// Gives the total number of intervals
         auto nb_intervals() const;
 
+        //// Gives the number of intervals for a given dimension
+        std::size_t nb_intervals(std::size_t d) const;
+
         //// Gives the number of cells
         std::size_t nb_cells() const;
 
@@ -514,6 +525,37 @@ namespace samurai
         return const_reverse_iterator(cbegin());
     }
 
+    /**
+     * Return the index in the x-interval array satisfying the input parameters
+     *
+     * @param interval The desired x-interval.
+     * @param index The desired indices for the other dimensions.
+     */
+    template <std::size_t Dim, class TInterval>
+    template <typename... T, typename D>
+    inline std::size_t LevelCellArray<Dim, TInterval>::get_interval_location(const interval_t& interval, T... index) const
+    {
+        return static_cast<std::size_t>(find(*this, {interval.start, index...}));
+    }
+
+    template <std::size_t Dim, class TInterval>
+    template <class E>
+    inline std::size_t LevelCellArray<Dim, TInterval>::get_interval_location(const interval_t& interval, const xt::xexpression<E>& index) const
+    {
+        xt::xtensor_fixed<value_t, xt::xshape<dim>> point;
+        point[0]                         = interval.start;
+        xt::view(point, xt::range(1, _)) = index;
+        return static_cast<std::size_t>(find(*this, point));
+    }
+
+    template <std::size_t Dim, class TInterval>
+    template <class E>
+    inline std::size_t LevelCellArray<Dim, TInterval>::get_interval_location(const xt::xexpression<E>& coord) const
+    {
+        xt::xtensor_fixed<value_t, xt::xshape<dim>> coord_array = coord;
+        return static_cast<std::size_t>(find(*this, coord_array));
+    }
+
     /**
      * Return the x-interval satisfying the input parameters
      *
@@ -640,6 +682,13 @@ namespace samurai
         return s;
     }
 
+    template <std::size_t Dim, class TInterval>
+    inline std::size_t LevelCellArray<Dim, TInterval>::nb_intervals(std::size_t d) const
+    {
+        assert(d < dim);
+        return m_cells[d].size();
+    }
+
     template <std::size_t Dim, class TInterval>
     inline std::size_t LevelCellArray<Dim, TInterval>::nb_cells() const
     {
diff --git a/include/samurai/load_balancing.hpp b/include/samurai/load_balancing.hpp
new file mode 100644
index 000000000..3c06ed3f5
--- /dev/null
+++ b/include/samurai/load_balancing.hpp
@@ -0,0 +1,1910 @@
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+#include "algorithm.hpp"
+#include "algorithm/utils.hpp"
+#include "hilbert.hpp"
+#include "mesh.hpp"
+#include "morton.hpp"
+#include "mr/mesh.hpp"
+
+// statistics
+#ifdef WITH_STATS
+#include <nlohmann/json.hpp>
+#endif
+
+#ifdef SAMURAI_WITH_MPI
+#include <boost/mpi.hpp>
+#endif
+
+
+#ifdef SAMURAI_WITH_MPI
+namespace samurai
+{
+
+    // namespace load_balance
+    // {
+
+    //     template <class Mesh_t, class CellArray_t>
+    //     Mesh_t merge(Mesh_t& mesh, const CellArray_t& lca)
+    //     {
+    //         using cl_type = typename Mesh_t::cl_type;
+
+    //         auto& refmesh = mesh[Mesh_t::mesh_id_t::cells];
+
+    //         auto minlevel = std::min(refmesh.min_level(), lca.min_level());
+    //         auto maxlevel = std::max(refmesh.max_level(), lca.max_level());
+
+    //         cl_type cl;
+    //         for (size_t ilvl = minlevel; ilvl <= maxlevel; ++ilvl)
+    //         {
+    //             auto un = samurai::union_(refmesh[ilvl], lca[ilvl]);
+
+    //             un(
+    //                 [&](auto& interval, auto& indices)
+    //                 {
+    //                     cl[ilvl][indices].add_interval(interval);
+    //                 });
+    //         }
+
+    //         return Mesh_t(cl, minlevel, maxlevel);
+    //     }
+
+    //     template <class Mesh_t, class CellArray_t>
+    //     Mesh_t remove(Mesh_t& mesh, CellArray_t& lca)
+    //     {
+    //         using cl_type = typename Mesh_t::cl_type;
+
+    //         auto& refmesh = mesh[Mesh_t::mesh_id_t::cells];
+
+    //         auto minlevel = std::min(refmesh.min_level(), lca.min_level());
+    //         auto maxlevel = std::max(refmesh.max_level(), lca.max_level());
+
+    //         // remove cells
+    //         cl_type cl;
+    //         size_t diff_ncells = 0;
+    //         for (size_t ilvl = minlevel; ilvl <= maxlevel; ++ilvl)
+    //         {
+    //             auto diff = samurai::difference(refmesh[ilvl], lca[ilvl]);
+
+    //             diff(
+    //                 [&](auto& interval, auto& index)
+    //                 {
+    //                     cl[ilvl][index].add_interval(interval);
+    //                     diff_ncells += interval.size();
+    //                 });
+    //         }
+
+    //         // new mesh for current process
+    //         return Mesh_t(cl, minlevel, maxlevel);
+    //     }
+
+    // }
+
+    struct MPI_Load_Balance
+    {
+        int32_t _load;
+        std::vector<int> neighbour;
+        std::vector<int32_t> load;
+        std::vector<int32_t> fluxes;
+    };
+
+    enum Distance_t
+    {
+        L1,
+        L2,
+        LINF,
+        GRAVITY
+    };
+
+    enum Direction_t
+    {
+        FACE,
+        DIAG,
+        FACE_AND_DIAG
+    };
+
+    enum BalanceElement_t
+    {
+        CELL,
+        INTERVAL
+    };
+
+    enum Weight{
+        OnSmall,
+        OnLarge,
+        None
+    };
+
+    static const double load_balancing_threshold = 0.01; // 0.03141592; // 2.5 %
+    // static const std::vector<double> load_balancing_cell_weight = { 1., 0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 
+    //                                                                 0.0078125, 0.00390625, 0.001953125, 0.0009765625,
+    //                                                                 0.00048828125, 0.000244140625, 0.0001220703125 };
+
+    /**
+     * Compute distance base on different norm.
+     */
+
+    template <size_t dim, class Coord_t>
+    static inline double distance_l2(const Coord_t& d1, const Coord_t& d2)
+    {
+        double dist = 0.;
+        for (size_t idim = 0; idim < dim; ++idim)
+        {
+            double d = d1(idim) - d2(idim);
+            dist += d * d;
+        }
+        return std::sqrt(dist);
+    }
+
+    template <size_t dim, class Coord_t>
+    static inline double distance_inf(const Coord_t& d1, const Coord_t& d2)
+    {
+        double dist = 0.;
+        for (size_t idim = 0; idim <dim; ++idim)
+        {
+            dist = std::max(std::abs(d1(idim) - d2(idim)), dist);
+        }
+        return dist;
+    }
+
+    template <size_t dim, class Coord_t>
+    static inline double distance_l1(const Coord_t& d1, const Coord_t& d2)
+    {
+        double dist = 0.;
+        for (size_t idim = 0; idim < dim; ++idim)
+        {
+            dist += std::abs(d1(idim) - d2(idim));
+        }
+        return dist;
+    }
+
+    /**
+     * Compute the load of the current process based on intervals or cells. It uses the
+     * mesh_id_t::cells to only consider leaves.
+     */
+    template <BalanceElement_t elem, class Mesh_t>
+    static std::size_t cmptLoad(const Mesh_t& mesh)
+    {
+        using mesh_id_t = typename Mesh_t::mesh_id_t;
+
+        const auto& current_mesh = mesh[mesh_id_t::cells];
+
+        std::size_t current_process_load = 0;
+
+        if constexpr (elem == BalanceElement_t::CELL)
+        {
+            // cell-based load without weight.
+            samurai::for_each_interval(current_mesh,
+                                       [&]([[maybe_unused]] std::size_t level, const auto& interval, [[maybe_unused]] const auto& index)
+                                       {
+                                           current_process_load += interval.size(); // * load_balancing_cell_weight[ level ];
+                                       });
+        }
+        else
+        {
+            // interval-based load without weight
+            for (std::size_t level = current_mesh.min_level(); level <= current_mesh.max_level(); ++level)
+            {
+                current_process_load += current_mesh[level].shape()[0]; // only in x-axis ;
+            }
+        }
+
+        return current_process_load;
+    }
+
+    /**
+     * Compute fluxes based on load computing stategy based on graph with label
+     * propagation algorithm. Return, for the current process, the flux in term of
+     * load, i.e. the quantity of "load" to transfer to its neighbours. If the load
+     * is negative, it means that the process (current) must send load to neighbour,
+     * if positive it means that it must receive load.
+     *
+     * This function use 2 MPI all_gather calls.
+     *
+     */
+    template <BalanceElement_t elem, class Mesh_t>
+    std::vector<int> cmptFluxes( Mesh_t& mesh, const std::vector<int> & neighbourhood, int niterations )
+    {
+
+        boost::mpi::communicator world;
+
+        std::ofstream logs; 
+        logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app );
+
+        size_t n_neighbours = neighbourhood.size();
+
+        // load of current process
+        int my_load = static_cast<int>( cmptLoad<elem>( mesh ) );
+        logs << "> Current process load : " << my_load << std::endl;
+
+        // fluxes between processes
+        std::vector<int> fluxes(n_neighbours, 0);
+
+        // load of each process (all processes not only neighbours)
+        std::vector<int> loads;
+
+        // numbers of neighbours processes for each process, used for weighting fluxes
+        std::vector<size_t> neighbourhood_n_neighbours;
+
+        // number of neighbours for each process
+        boost::mpi::all_gather(world, neighbourhood.size(), neighbourhood_n_neighbours);
+
+        // get "my_load" from other processes
+        int nt = 0;
+        while( nt < niterations ){
+            boost::mpi::all_gather(world, my_load, loads);
+
+            // compute updated my_load for current process based on its neighbourhood
+            int my_load_new = my_load;
+            for (std::size_t n_i = 0; n_i < n_neighbours; ++n_i)
+            {
+                std::size_t neighbour_rank = static_cast<std::size_t>( neighbourhood[ n_i ] );
+                int neighbour_load         = loads[ neighbour_rank ];
+                double diff_load           = static_cast<double>( neighbour_load - my_load_new);
+
+                std::size_t nb_neighbours_neighbour = neighbourhood_n_neighbours[ neighbour_rank ];
+
+                double weight = 1. / static_cast<double>( std::max( n_neighbours, nb_neighbours_neighbour ) + 1 );
+
+                // if transferLoad < 0 -> need to send data, if transferLoad > 0 need to receive data
+                int transfertLoad = static_cast<int>( std::lround( weight * diff_load )) ;
+
+                fluxes[ n_i ] += transfertLoad;
+
+                // my_load_new += transfertLoad;
+
+                my_load += transfertLoad;
+            }
+
+            logs << fmt::format("\t> it {}, neighbours : ", nt) ;
+            for( size_t in=0; in<neighbourhood.size(); ++in )
+                logs << neighbourhood[ in ] << "[" << loads[ static_cast<size_t>( neighbourhood[ in ] )  ] << "], ";
+            logs << std::endl << "\t\t>fluxes : ";
+            for( size_t in=0; in<neighbourhood.size(); ++in )
+                logs << fluxes[ in ] << ", ";
+            logs << std::endl;
+            logs << "New theoretical load : " << my_load << std::endl;
+
+            nt ++ ;
+        }
+
+        // apply threshold, if the difference is smaller than #load_balancing_threshold of the number of cells,
+        // we do not load balance those processes
+        for (std::size_t n_i = 0; n_i < n_neighbours; ++n_i)
+        {
+            std::size_t neighbour_rank = static_cast<std::size_t>( neighbourhood[ n_i ] );
+            int abs_diff               = std::abs( fluxes[ n_i ] );
+            int threshold_neigh        = static_cast<int>( load_balancing_threshold * loads[ neighbour_rank ] );
+            int threshold_curr         = static_cast<int>( load_balancing_threshold * my_load ); 
+
+            if( abs_diff < threshold_curr && abs_diff < threshold_neigh ){
+                fluxes[ n_i ] = 0;
+            }
+
+        }
+
+        return fluxes;
+    }
+
+    /**
+     * Compute fluxes based on load computing stategy based on graph with label
+     * propagation algorithm. Return, for the current process, the flux in term of
+     * load, i.e. the quantity of "load" to transfer to its neighbours. If the load
+     * is negative, it means that the process (current) must send load to neighbour,
+     * if positive it means that it must receive load.
+     *
+     * This function use 2 MPI all_gather calls.
+     *
+     */
+    template <BalanceElement_t elem, class Mesh_t>
+    std::vector<int> cmptFluxes(Mesh_t& mesh, int niterations)
+    {
+        using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t;
+
+        boost::mpi::communicator world;
+
+        std::ofstream logs; 
+        logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app );
+
+        // give access to geometricaly neighbour process rank and mesh
+        std::vector<mpi_subdomain_t>& neighbourhood = mesh.mpi_neighbourhood();
+        size_t n_neighbours                         = neighbourhood.size();
+
+        // load of current process
+        int my_load = static_cast<int>(cmptLoad<elem>(mesh));
+
+        logs << "> Current process load : " << my_load << std::endl;
+
+        // fluxes between processes
+        std::vector<int> fluxes(n_neighbours, 0);
+
+        // load of each process (all processes not only neighbours)
+        std::vector<int> loads;
+
+        // numbers of neighbours processes for each process, used for weighting fluxes
+        std::vector<size_t> neighbourhood_n_neighbours;
+
+        // number of neighbours for each process
+        boost::mpi::all_gather(world, neighbourhood.size(), neighbourhood_n_neighbours);
+
+        // get "my_load" from other processes
+        int nt = 0;
+        while( nt < niterations ){
+            boost::mpi::all_gather(world, my_load, loads);
+
+            // compute updated my_load for current process based on its neighbourhood
+            int my_load_new = my_load;
+            for (std::size_t n_i = 0; n_i < n_neighbours; ++n_i)
+            {
+                std::size_t neighbour_rank = static_cast<std::size_t>(neighbourhood[n_i].rank);
+                int neighbour_load         = loads[neighbour_rank];
+                double diff_load           = static_cast<double>( neighbour_load - my_load_new);
+
+                std::size_t nb_neighbours_neighbour = neighbourhood_n_neighbours[neighbour_rank];
+
+                double weight = 1. / static_cast<double>(std::max(n_neighbours, nb_neighbours_neighbour) + 1);
+
+                // if transferLoad < 0 -> need to send data, if transferLoad > 0 need to receive data
+                int transfertLoad = static_cast<int>(std::lround(weight * diff_load));
+
+                fluxes[n_i] += transfertLoad;
+
+                // my_load_new += transfertLoad;
+
+                my_load += transfertLoad;
+            }
+
+            logs << fmt::format("\t> it {}, neighbours : ", nt) ;
+            for( size_t in=0; in<neighbourhood.size(); ++in )
+                logs << neighbourhood[ in ].rank << "[" << loads[ static_cast<size_t>( neighbourhood[ in ].rank ) ] << "], ";
+            logs << std::endl << "\t> fluxes : ";
+            for( size_t in=0; in<neighbourhood.size(); ++in )
+                logs << fluxes[ in ] << ", ";
+            logs << std::endl;
+            logs << "\t> New theoretical load : " << my_load << std::endl;
+
+            nt ++ ;
+        }
+
+        // apply threshold, if the difference is smaller than #load_balancing_threshold of the number of cells,
+        // we do not load balance those processes
+        for (std::size_t n_i = 0; n_i < n_neighbours; ++n_i)
+        {
+            std::size_t neighbour_rank = static_cast<std::size_t>( neighbourhood[ n_i ].rank );
+            int abs_diff               = std::abs( fluxes[ n_i ] );
+            int threshold_neigh        = static_cast<int>( load_balancing_threshold * loads[ neighbour_rank ] );
+            int threshold_curr         = static_cast<int>( load_balancing_threshold * my_load ); 
+
+            if( abs_diff < threshold_curr && abs_diff < threshold_neigh ){
+                fluxes[ n_i ] = 0;
+            }
+
+        }
+
+        return fluxes;
+    }
+
+    /** Is gravity the key ? That would be awesome =)
+     * This is not a distance but well does not change anything to the algorithm
+     * - G m m' / R ?
+     *
+     * What should be m ? m' ? Let's G = 1.
+     *
+     **/
+    template <size_t dim, class Coord_t>
+    static inline double gravity(const Coord_t& d1, const Coord_t& d2)
+    {
+        double dist = distance_l2<dim>(d1, d2);
+
+        // makes high level ( smallest ) cells to be exchanged more easily. 10 here is max
+        // level, hardcoded for tests.
+        // double m = 1./ static_cast<double>( 1 << ( 10 - d1.level ) ) , m_p = 1.;
+
+        // makes bigger cells to be exchanged more easily
+        // double m = 1./ static_cast<double>( 1 << d1.level ) , m_p = 1.;
+        constexpr double G = 1.;
+        double m = 1., m_p = 1.;
+        double f = -G * m * m_p / (dist * dist);
+
+        return f;
+    }
+
+    // we are using Cell_t to allow ponderation using level;
+    template <size_t dim, Distance_t dist, class Coord_t>
+    inline constexpr double getDistance(const Coord_t& cc, const Coord_t& d)
+    {
+        static_assert(dim == 2 || dim == 3);
+        if constexpr (dist == Distance_t::L1)
+        {
+            return distance_l1<dim>(cc, d);
+        }
+        else if constexpr (dist == Distance_t::L2)
+        {
+            return distance_l2<dim>(cc, d);
+        }
+        else if constexpr (dist == Distance_t::LINF)
+        {
+            return distance_inf<dim>(cc, d);
+        }
+        else if constexpr (dist == Distance_t::GRAVITY)
+        {
+            return gravity<dim>(cc, d);
+        }
+    }
+
+    template<class Mesh_t, class Id_t, class Coord_t>
+    bool cellExists( const Mesh_t & mesh, Id_t mesh_id, std::size_t level, const Coord_t & lo ){
+        using CellList_t  = typename Mesh_t::cl_type;
+        using CellArray_t = typename Mesh_t::ca_type;
+
+        CellList_t tmp;
+        if constexpr( Mesh_t::dim == 2 ) { tmp[ level ][ { lo( 1 ) } ].add_point( lo( 0 ) ); }
+        if constexpr( Mesh_t::dim == 3 ) { tmp[ level ][ { lo( 1 ), lo( 2 ) } ].add_point( lo( 0 ) ); }
+
+        CellArray_t tmp_ = { tmp, false };
+        auto set = intersection( mesh[ mesh_id ][level], tmp_[level]);
+
+        size_t ninterval = 0;
+        set( [&]([[maybe_unused]]const auto& i, [[maybe_unused]]const auto& index) {
+            ninterval ++ ;
+        });
+
+        return ninterval > 0 ? true : false ;
+    }
+
+    template <class Flavor>
+    class LoadBalancer
+    {
+      private:
+
+      public:
+        std::ofstream logs;
+        int nloadbalancing;
+
+        template <class Mesh_t, class Field_t>
+        void update_field(Mesh_t& new_mesh, Field_t& field)
+        {
+            using mesh_id_t = typename Mesh_t::mesh_id_t;
+            using value_t   = typename Field_t::value_type;
+            boost::mpi::communicator world;
+
+            logs << fmt::format("\n# [LoadBalancer]::update_field rank # {} -> '{}' ", world.rank(), field.name() ) << std::endl;
+
+            Field_t new_field("new_f", new_mesh);
+            new_field.fill(0);
+
+            auto & old_mesh = field.mesh();
+
+            // auto min_level = boost::mpi::all_reduce(world, mesh[mesh_id_t::cells].min_level(), boost::mpi::minimum<std::size_t>());
+            // auto max_level = boost::mpi::all_reduce(world, mesh[mesh_id_t::cells].max_level(), boost::mpi::maximum<std::size_t>());
+
+            auto min_level = old_mesh.min_level();
+            auto max_level = old_mesh.max_level();
+
+            // copy data of intervals that are didn't move
+            for (std::size_t level = min_level; level <= max_level; ++level)
+            {
+                auto intersect_old_new = intersection(old_mesh[mesh_id_t::cells][level], new_mesh[mesh_id_t::cells][level]);
+                intersect_old_new.apply_op( samurai::copy( new_field, field ) );
+            }
+
+            std::vector<boost::mpi::request> req;
+            std::vector<std::vector<value_t>> to_send( static_cast<size_t>( world.size() ) );
+
+            // FIXME: this is overkill and will not scale
+            std::vector<Mesh_t> all_new_meshes, all_old_meshes;
+            boost::mpi::all_gather( world, new_mesh, all_new_meshes );
+            boost::mpi::all_gather( world, field.mesh(), all_old_meshes );
+
+            // build payload of field that has been sent to neighbour, so compare old mesh with new neighbour mesh 
+            // for (auto& neighbour : new_mesh.mpi_neighbourhood())
+            for( size_t ni=0; ni<all_new_meshes.size(); ++ni )
+            {
+                if( static_cast<int>( ni ) == world.rank() ) continue;
+
+                // auto & neighbour_new_mesh = neighbour.mesh;
+                auto & neighbour_new_mesh = all_new_meshes[ ni ];
+
+                for (std::size_t level = min_level; level <= max_level; ++level)
+                {
+                    if (!old_mesh[mesh_id_t::cells][level].empty() && !neighbour_new_mesh[mesh_id_t::cells][level].empty())
+                    {
+                        auto intersect_old_mesh_new_neigh = intersection( old_mesh[mesh_id_t::cells][level], neighbour_new_mesh[mesh_id_t::cells][level] );
+                        intersect_old_mesh_new_neigh(
+                            [&](const auto & interval, const auto & index)
+                            {   
+                                std::copy(field(level, interval, index).begin(), field(level, interval, index).end(), std::back_inserter(to_send[ni]));
+                            });
+                    }
+                }
+
+                if (to_send[ni].size() != 0)
+                {
+                    // neighbour_rank = neighbour.rank;
+                    auto neighbour_rank = static_cast<int>( ni );
+                    req.push_back( world.isend( neighbour_rank, neighbour_rank, to_send[ ni ] ) );
+
+                    logs << fmt::format("\t> [LoadBalancer]::update_field send data to rank # {}", neighbour_rank ) << std::endl;
+                }
+            }
+
+            logs << fmt::format("\t> [LoadBalancer]::update_field number of isend request: {}", req.size() ) << std::endl;
+
+            // build payload of field that I need to receive from neighbour, so compare NEW mesh with OLD neighbour mesh 
+            for (size_t ni=0; ni<all_old_meshes.size(); ++ni )
+            {
+                if( static_cast<int>( ni ) == world.rank() ) continue;
+
+                bool isintersect = false;
+                for (std::size_t level = min_level; level <= max_level; ++level)
+                {
+                    if (!new_mesh[mesh_id_t::cells][level].empty() && !all_old_meshes[ ni ][mesh_id_t::cells][level].empty())
+                    {
+                        std::vector<value_t> to_recv;
+
+                        auto in_interface = intersection( all_old_meshes[ ni ][mesh_id_t::cells][level], new_mesh[mesh_id_t::cells][level]);
+
+                        in_interface(
+                            [&]( [[maybe_unused]]const auto& i, [[maybe_unused]]const auto& index)
+                            {
+                                isintersect = true;
+                            });
+
+                        if (isintersect)
+                        {
+                            break;
+                        }
+                    }
+                }
+
+                if (isintersect)
+                {
+                    std::ptrdiff_t count = 0;
+                    std::vector<value_t> to_recv;
+                    world.recv( static_cast<int>( ni ), world.rank(), to_recv);
+
+                    for (std::size_t level = min_level; level <= max_level; ++level)
+                    {
+                        if (!new_mesh[mesh_id_t::cells][level].empty() && !all_old_meshes[ ni ][mesh_id_t::cells][level].empty())
+                        {
+                            auto in_interface = intersection(all_old_meshes[ ni ][mesh_id_t::cells][level], new_mesh[mesh_id_t::cells][level]);
+
+                            in_interface(
+                                [&](const auto& i, const auto& index)
+                                {
+                                    std::copy(to_recv.begin() + count,
+                                              to_recv.begin() + count + static_cast<ptrdiff_t>(i.size()*field.size),
+                                              new_field(level, i, index).begin());
+                                    count += static_cast<ptrdiff_t>(i.size()*field.size);
+
+                                    // std::cerr << fmt::format("Process {}, recv interval {}", world.rank(), i) << std::endl;
+                                });
+                        }
+                    }
+                }
+            }
+
+            if (!req.empty())
+            {
+                mpi::wait_all(req.begin(), req.end());
+            }
+
+            std::swap(field.array(), new_field.array());
+        }
+
+        template <class Mesh_t, class Field_t, class... Fields_t>
+        void update_fields(Mesh_t& new_mesh, Field_t& field, Fields_t&... kw)
+
+        {
+            update_field(new_mesh, field);
+            update_fields(new_mesh, kw...);
+        }
+
+        template <class Mesh_t>
+        void update_fields([[maybe_unused]]Mesh_t& new_mesh)
+        {
+        }
+
+      public:
+
+        LoadBalancer() {
+            boost::mpi::communicator world;
+            logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app );
+            nloadbalancing = 0;
+        }
+
+        ~LoadBalancer() {
+            logs.close();
+        }
+
+        /**
+        * This function reorder cells across MPI processes based on a
+        * Space Filling Curve. This is mandatory for load balancing using SFC.
+        */
+        template <class Mesh_t, class Field_t, class... Fields>
+        void reordering( Mesh_t & mesh, Field_t& field, Fields&... kw )
+        {
+            // new reordered mesh on current process + MPI exchange with others
+            // auto new_mesh = static_cast<Flavor*>(this)->reordering_impl( mesh );
+            logs << "\t> Computing reordering flags ... " << std::endl;
+            auto flags = static_cast<Flavor*>(this)->reordering_impl( mesh );
+
+            logs << "\t> Update mesh based on flags ... " << std::endl;
+            auto new_mesh = update_mesh( mesh, flags );
+
+            // update each physical field on the new reordered mesh
+            SAMURAI_TRACE("[Reordering::load_balance]::Updating fields ... ");
+            logs << "\t> Update fields based on flags ... " << std::endl;
+            update_fields( new_mesh, field, kw... );
+
+            // swap mesh reference.
+            // FIX: this is not clean
+            SAMURAI_TRACE("[Reordering::load_balance]::Swapping meshes ... ");
+            field.mesh().swap( new_mesh );
+
+            // discover neighbours: add new neighbours if a new interface appears or remove old neighbours
+            discover_neighbour( field.mesh() );
+            discover_neighbour( field.mesh() );
+
+        }
+
+        template<class Mesh_t, class Field_t>
+        Mesh_t update_mesh( Mesh_t & mesh, const Field_t & flags ) {
+
+            using CellList_t  = typename Mesh_t::cl_type;
+            using CellArray_t = typename Mesh_t::ca_type;
+
+            boost::mpi::communicator world;
+
+            CellList_t new_cl;
+            std::vector<CellList_t> payload( static_cast<size_t>( world.size() ) );
+            std::vector<size_t> payload_size( static_cast<size_t>( world.size()), 0 );
+
+            std::map<int, bool> comm;
+
+            // build cell list for the current process && cells lists of cells for other processes
+            samurai::for_each_cell( mesh[Mesh_t::mesh_id_t::cells], [&]( const auto & cell ){
+            
+                if( flags[ cell ] == world.rank() ){
+                    if constexpr ( Mesh_t::dim == 1 ){ new_cl[ cell.level ][ {} ].add_point( cell.indices[ 0 ] ); }
+                    if constexpr ( Mesh_t::dim == 2 ){ new_cl[ cell.level ][ { cell.indices[ 1 ] } ].add_point( cell.indices[ 0 ] ); }
+                    if constexpr ( Mesh_t::dim == 3 ){ new_cl[ cell.level ][ { cell.indices[ 1 ], cell.indices[ 2 ] } ].add_point( cell.indices[ 0 ] ); }                        
+                }else{
+                    assert( static_cast<size_t>( flags[ cell ] ) < payload.size() );
+
+                    if( comm.find( flags[ cell ] ) == comm.end() ) {
+                        comm[ flags[ cell ] ] = true;
+                    }
+
+                    if constexpr ( Mesh_t::dim == 1 ){ payload[ static_cast<size_t>( flags[ cell ] ) ][ cell.level ][ {} ].add_point( cell.indices[ 0 ] ); }
+                    if constexpr ( Mesh_t::dim == 2 ){ payload[ static_cast<size_t>( flags[ cell ] ) ][ cell.level ][ { cell.indices[ 1 ] } ].add_point( cell.indices[ 0 ] ); }
+                    if constexpr ( Mesh_t::dim == 3 ){ payload[ static_cast<size_t>( flags[ cell ] ) ][ cell.level ][ { cell.indices[ 1 ], cell.indices[ 2 ] } ].add_point( cell.indices[ 0 ] ); }
+
+                    payload_size[ static_cast<size_t>( flags[ cell ] ) ] ++;
+                } 
+
+            });
+
+            logs << "\t\t>[Load_balancer::update_mesh] Comm required with processes : [";
+            for( const auto & it : comm )
+                logs << it.first << fmt::format(" ({} cells),", payload_size[ static_cast<size_t>( it.first ) ]);
+            logs << "]" << std::endl;
+
+            std::vector<int> req_send( static_cast<size_t>( world.size() ), 0 ), req_recv( static_cast<size_t>( world.size() ), 0 );
+
+            // Required to know communication pattern
+            for( int iproc=0; iproc<world.size(); ++iproc ){
+                if( iproc == world.rank() ) continue;
+
+                int reqExchg;
+                comm.find( iproc ) != comm.end() ? reqExchg = 1 : reqExchg = 0;
+
+                if( payload[ static_cast<size_t>( iproc ) ].empty() ) reqExchg = 0;
+
+                req_send[ static_cast<size_t>( iproc ) ] = reqExchg;
+
+                world.send( iproc, 42, reqExchg );
+
+            }
+
+            for( int iproc=0; iproc<world.size(); ++iproc ){
+                if( iproc == world.rank() ) continue;
+                world.recv( iproc, 42, req_recv[ static_cast<size_t>( iproc ) ] );            
+            }
+
+            for( int iproc=0; iproc<world.size(); ++iproc ){
+               logs << fmt::format("Proc # {}, req_send : {}, req_recv: {} ", iproc, req_send[ static_cast<size_t>( iproc )  ], req_recv[ static_cast<size_t>( iproc ) ] ) << std::endl;;            
+            }
+
+            std::vector<mpi::request> req;
+
+            // actual data echange between processes that need to exchange data
+            for(int iproc=0; iproc<world.size(); ++iproc){
+                if( iproc == world.rank() ) continue ;
+
+                if( req_send[ static_cast<size_t>( iproc ) ] == 1 ){
+                    CellArray_t to_send = { payload[ static_cast<size_t>( iproc ) ], false };
+
+                    req.push_back( world.isend( iproc, 17, to_send ) );
+
+                    logs << fmt::format("\t> Sending to # {}", iproc ) << std::endl;
+                }
+            }
+
+            for(int iproc=0; iproc<world.size(); ++iproc){
+                if( iproc == world.rank() ) continue ;
+
+                if( req_recv[ static_cast<size_t>( iproc ) ] == 1 ) {
+                    CellArray_t to_rcv;
+                    logs << fmt::format("\t> Recving from # {}", iproc ) << std::endl;
+                    world.recv( iproc, 17, to_rcv );
+                    
+                    samurai::for_each_interval(to_rcv, [&](std::size_t level, const auto & interval, const auto & index ){
+                        new_cl[ level ][ index ].add_interval( interval );
+                    });
+                }
+            }
+
+            boost::mpi::wait_all( req.begin(), req.end() );
+
+            Mesh_t new_mesh( new_cl, mesh );
+
+            return new_mesh;
+        }
+
+        template <class Mesh_t, class Field_t, class... Fields>
+        void load_balance(Mesh_t & mesh, Field_t& field, Fields&... kw)
+        {
+
+            std::string lbn = static_cast<Flavor*>(this)->getName();
+
+            logs << fmt::format("\n###################################################" ) << std::endl;
+            logs << fmt::format("> Load balancing ({}) mesh @ iteration {} ", lbn, nloadbalancing ) << std::endl;
+            logs << fmt::format("###################################################\n") << std::endl;
+
+            auto p = lbn.find( "SFC" );
+            if( nloadbalancing == 0 && p != std::string::npos ) {
+                reordering( mesh , field, kw... );
+            }
+
+            // specific load balancing strategy
+            // auto new_mesh = static_cast<Flavor*>(this)->load_balance_impl( field.mesh() );
+
+            auto flags = static_cast<Flavor*>(this)->load_balance_impl( field.mesh() );
+
+            auto new_mesh = update_mesh( mesh, flags );
+
+            // update each physical field on the new load balanced mesh
+            SAMURAI_TRACE("[LoadBalancer::load_balance]::Updating fields ... ");
+            update_fields( new_mesh, field, kw... );
+
+            // swap mesh reference to new load balanced mesh. FIX: this is not clean
+            SAMURAI_TRACE("[LoadBalancer::load_balance]::Swapping meshes ... ");
+            field.mesh().swap( new_mesh );
+
+            // discover neighbours: add new neighbours if a new interface appears or remove old neighbours
+            // FIX: add boolean return to condition the need of another call, might save some MPI comm.
+            SAMURAI_TRACE("[LoadBalancer::load_balance]::discover neighbours ... ");
+            discover_neighbour( field.mesh() );
+            discover_neighbour( field.mesh() );
+
+            nloadbalancing += 1;
+        }
+
+        /*
+        * Call balancer function that evaluate if load balancing is required or not based on balncer internal strategy
+        *
+        */
+        template<class Mesh_t>
+        bool require_balance( Mesh_t & mesh ) {
+            return static_cast<Flavor*>(this)->require_balance_impl( mesh );
+        }
+
+        /**
+        * This function MUST be used for debug or analysis purposes of load balancing strategy,
+        * it involves a lots of MPI communications.
+        *
+        * Try to evaluate / compute a load balancing score. We expect from a good load 
+        * balancing strategy to:
+        *   - optimize the number of neighbours   (reduce comm.)
+        *   - optimize the number of ghosts cells (reduce comm.)
+        *   - load balance charge between processes
+        *   - optimize the size of interval (samurai specific, expect better perf, better simd)
+        */
+        template <class Mesh>
+        void evaluate_balancing(Mesh& mesh) const
+        {
+            boost::mpi::communicator world;
+
+            if (world.rank() == 0)
+            {
+                SAMURAI_TRACE("[LoadBalancer::evaluate_balancing]::Entering function ... ");
+            }
+
+            std::vector<int> load_cells, load_interval;
+            {
+                int my_load_i = static_cast<int>(cmptLoad<samurai::BalanceElement_t::INTERVAL>(mesh));
+                boost::mpi::all_gather(world, my_load_i, load_interval);
+
+                int my_load_c = static_cast<int>(cmptLoad<samurai::BalanceElement_t::CELL>(mesh));
+                boost::mpi::all_gather(world, my_load_c, load_cells);
+            }
+
+            // if (world.rank() == 0)
+            // {
+            //     std::cerr << "\t> LoadBalancer statistics : " << std::endl;
+
+            //     std::vector<int>::iterator min_load = std::min_element(load_cells.begin(), load_cells.end());
+            //     auto rank                           = std::distance(load_cells.begin(), min_load);
+            //     std::cerr << "\t\t> Min load {" << *min_load << ", cells } @ rank # " << rank << std::endl;
+
+            //     std::vector<int>::iterator max_load = std::max_element(load_cells.begin(), load_cells.end());
+            //     rank                                = std::distance(load_cells.begin(), max_load);
+            //     std::cerr << "\t\t> Max load {" << *max_load << ", cells } @ rank # " << rank << std::endl;
+            // }
+
+            // std::string _stats = fmt::format("statistics_process_{}", world.rank());
+            // samurai::Statistics s(_stats);
+
+            // s("stats", mesh);
+
+            // s( "statistics" , mesh );
+
+            // no need to implement this in derived class, could be general.
+            //
+            // static_cast<Flavor *>(this)->evaluate_impl( mesh, kw... );
+        }
+    };
+
+    /**
+     * Precomputed direction to face-to-face element for both 3D and 2D
+     */
+    template <size_t dim>
+    constexpr auto getDirectionFace()
+    {
+        using base_stencil = xt::xtensor_fixed<int, xt::xshape<dim>>;
+
+        constexpr size_t size_ = 2 * dim;
+        xt::xtensor_fixed<base_stencil, xt::xshape< size_ >> stencils;
+        std::size_t nstencils = size_;
+        for (std::size_t ist = 0; ist < nstencils; ++ist)
+        {
+            stencils[ist].fill(0);
+            stencils[ist][ist / 2] = 1 - (ist % 2) * 2;
+        }
+        // if constexpr(  dim == 2 ){
+        //     return xt::xtensor_fixed<base_stencil, xt::xshape<4>> {{{1, 0}, {-1, 0}, {0, 1}, {0, -1}}};
+        // }
+
+        return stencils;
+    }
+
+    /**
+     * Precomputed direction to diagonal element for both 3D and 2D cases.
+     */
+    template <size_t dim>
+    constexpr auto getDirectionDiag()
+    {
+        using base_stencil = xt::xtensor_fixed<int, xt::xshape<dim>>;
+
+        static_assert(dim == 2 || dim == 3);
+
+        if constexpr (dim == 2)
+        {
+            return xt::xtensor_fixed<base_stencil, xt::xshape<4>>{
+                {{1, 1}, {1, -1}, {-1, 1}, {-1, -1}}
+            };
+        }
+
+        if constexpr (dim == 3)
+        {
+            return xt::xtensor_fixed<base_stencil, xt::xshape<20>>{
+                {{1, 1, -1},  {1, -1, -1}, {-1, 1, -1}, {-1, -1, -1}, {1, 0, -1},  {-1, 0, -1}, {0, 1, -1},
+                 {0, -1, -1}, {1, 1, 0},   {1, -1, 0},  {-1, 1, 0},   {-1, -1, 0}, {1, 1, 1},   {1, -1, 1},
+                 {-1, 1, 1},  {-1, -1, 1}, {1, 0, 1},   {-1, 0, 1},   {0, 1, 1},   {0, -1, 1}}
+            };
+        }
+    }
+
+    /**
+     * Precompute direction to element in all direction face + diagonals: 26 in 3D, 8 in 2D
+     */
+    template <size_t dim>
+    constexpr auto getDirectionFaceAndDiag()
+    {
+        using base_stencil = xt::xtensor_fixed<int, xt::xshape<dim>>;
+
+        static_assert(dim == 2 || dim == 3);
+
+        if constexpr (dim == 2)
+        {
+            return xt::xtensor_fixed<base_stencil, xt::xshape<8>>{
+                {{-1, 0}, {1, 0}, {1, 1}, {1, -1}, {0, -1}, {0, 1}, {-1, 1}, {-1, -1}}
+            };
+        }
+
+        if constexpr (dim == 3)
+        {
+            return xt::xtensor_fixed<base_stencil, xt::xshape<26>>{
+                {{1, 0, 0},    {-1, 0, 0}, {0, 1, 0},   {0, -1, 0},  {0, 0, 1},   {0, 0, -1}, {1, 1, -1}, {1, -1, -1}, {-1, 1, -1},
+                 {-1, -1, -1}, {1, 0, -1}, {-1, 0, -1}, {0, 1, -1},  {0, -1, -1}, {1, 1, 0},  {1, -1, 0}, {-1, 1, 0},  {-1, -1, 0},
+                 {1, 1, 1},    {1, -1, 1}, {-1, 1, 1},  {-1, -1, 1}, {1, 0, 1},   {-1, 0, 1}, {0, 1, 1},  {0, -1, 1}}
+            };
+        }
+    }
+
+    template <int dim, Direction_t dir>
+    inline auto getDirection()
+    {
+        static_assert(dim == 2 || dim == 3);
+        if constexpr (dir == Direction_t::FACE)
+        {
+            return getDirectionFace<dim>();
+        }
+        else if constexpr (dir == Direction_t::DIAG)
+        {
+            return getDirectionDiag<dim>();
+        }
+        else if constexpr (dir == Direction_t::FACE_AND_DIAG)
+        {
+            return getDirectionFaceAndDiag<dim>();
+        }
+    }
+
+    /**
+     * Compute the barycenter of the mesh given in parameter.
+     *
+     * Feat: adjust wght which is hardcoded to 1. for now.
+     *       by passing in parameters an array for example to modulate
+     *       weight according to level
+     */
+    template <size_t dim, Weight w=Weight::None, class Mesh_t>
+    xt::xtensor_fixed<double, xt::xshape<dim>> _cmpCellBarycenter(Mesh_t& mesh)
+    {
+        using Coord_t = xt::xtensor_fixed<double, xt::xshape<dim>>;
+
+        Coord_t bary;
+        bary.fill(0.);
+
+        double wght_tot = 0.;
+        samurai::for_each_cell(mesh,
+                               [&](auto& cell)
+                               {
+                                   double wght = 1.;
+                                   
+                                   if constexpr( w == Weight::OnSmall ){
+                                    wght = 1. / static_cast<double>( 1 << (mesh.max_level() - cell.level) );
+                                   }
+
+                                   if constexpr( w == Weight::OnLarge ){
+                                    wght = 1. / static_cast<double>( 1 << cell.level );
+                                   }
+
+                                   auto cc = cell.center();
+
+                                   for (size_t idim = 0; idim < dim; ++idim)
+                                   {
+                                       bary(idim) += cc(idim) * wght;
+                                   }
+
+                                   wght_tot += wght;
+                               });
+
+        wght_tot = std::max(wght_tot, 1e-12);
+        for (size_t idim = 0; idim < dim; ++idim)
+        {
+            bary(idim) /= wght_tot;
+        }
+
+        return bary;
+    }
+
+
+
+    /**
+     *
+     * Params:
+     *          leveldiff : max level difference. For 2:1 balance, leveldiff = 1
+     */
+    template <size_t dim, Direction_t dir, size_t leveldiff = 1, class Mesh_t>
+    static auto cmptInterface(const Mesh_t& mesh, const Mesh_t& omesh)
+    {
+        using CellList_t  = typename Mesh_t::cl_type;
+        using CellArray_t = typename Mesh_t::ca_type;
+        using mesh_id_t   = typename Mesh_t::mesh_id_t;
+
+        CellList_t interface;
+
+        // operation are on leaves only
+        auto& currentMesh = mesh[mesh_id_t::cells];
+        auto& otherMesh   = omesh[mesh_id_t::cells];
+
+        // direction for translation to cmpt interface
+        auto dirs = getDirection<dim, dir>();
+
+        for (size_t ist = 0; ist < dirs.size(); ++ist)
+        {
+            const auto& stencil = dirs[ist];
+
+            size_t minlevel = otherMesh.min_level();
+            size_t maxlevel = otherMesh.max_level();
+
+            for (size_t level = minlevel; level <= maxlevel; ++level)
+            {
+                // for each level we need to check level -1 / 0 / +1
+                std::size_t minlevel_check = static_cast<std::size_t>(
+                    std::max(static_cast<int>(currentMesh.min_level()), static_cast<int>(level) - static_cast<int>(leveldiff)));
+                std::size_t maxlevel_check = std::min(currentMesh.max_level(), level + leveldiff);
+
+                for (size_t projlevel = minlevel_check; projlevel <= maxlevel_check; ++projlevel)
+                {
+                    // translate neighbour from dir (hopefully to current) all direction are tested
+                    auto set       = translate(otherMesh[level], stencil);
+                    auto intersect = intersection(set, currentMesh[projlevel]).on(projlevel);
+
+                    size_t nbInter_ = 0, nbCells = 0;
+                    intersect(
+                        [&](auto& interval, [[maybe_unused]] auto& index)
+                        {
+                            nbInter_ += 1;
+                            nbCells += interval.size();
+                        });
+
+                    // we get more interval / cells, than wanted because neighbour has bigger cells
+                    // 2:1 balance required here if not, need another loop...
+                    if (nbInter_ > 0 && projlevel > level)
+                    {
+                        static_assert(leveldiff == 1); // for now at least
+                        auto set_  = translate(currentMesh[projlevel], stencil);
+                        auto diff_ = difference(intersect, set_);
+
+                        diff_(
+                            [&](auto& interval, auto& index)
+                            {
+                                interface[projlevel][index].add_interval(interval);
+                            });
+                    }
+                    else
+                    {
+                        if (nbInter_ > 0)
+                        {
+                            intersect(
+                                [&](auto& interval, auto& index)
+                                {
+                                    interface[projlevel][index].add_interval(interval);
+                                });
+                        }
+                    }
+                }
+            }
+        }
+
+        CellArray_t interface_ = {interface, false};
+
+        return interface_;
+    }
+
+    /**
+     *
+     */
+    template <size_t dim, class Mesh_t, class Dir_t>
+    static auto cmptInterfaceUniform(Mesh_t& mesh, Mesh_t& omesh, const Dir_t & dir )
+    {
+
+        // dim == 3 is not supported for the moment.
+        assert( dim == 2 );
+
+        using CellList_t  = typename Mesh_t::cl_type;
+        using CellArray_t = typename Mesh_t::ca_type;
+        using mesh_id_t   = typename Mesh_t::mesh_id_t;
+
+        // operation are on leaves only
+        auto& currentMesh = mesh[mesh_id_t::cells];
+        auto& otherMesh   = omesh[mesh_id_t::cells];
+
+        size_t minlevel = otherMesh.min_level();
+        size_t maxlevel = otherMesh.max_level();
+
+        size_t minLevelAtInterface = 99;
+
+        struct MinMax {
+            int min_x = std::numeric_limits<int>::max();
+            int max_x = std::numeric_limits<int>::min();
+            int min_y = std::numeric_limits<int>::max();
+            int max_y = std::numeric_limits<int>::min();
+        };
+
+        boost::mpi::communicator world;
+        std::ofstream logs;
+        logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app );
+
+        logs << fmt::format("\t\t\t> Allocating vector of size : max({}, {})+2 : {}", mesh.max_level(), omesh.max_level(), 
+                            std::max( mesh.max_level(), omesh.max_level() ) + 2 ) << std::endl;
+
+        size_t msize = std::max( mesh.max_level(), omesh.max_level() ) + 2;
+        std::vector< MinMax > mm ( msize );
+
+        for (size_t level = minlevel; level <= maxlevel; ++level)
+        {
+            // for each level we need to check level -1 / 0 / +1
+            std::size_t minlevel_check = static_cast<std::size_t>(
+                std::max(static_cast<int>(currentMesh.min_level()), static_cast<int>(level - 1) ));
+            std::size_t maxlevel_check = std::min(currentMesh.max_level(), level + static_cast<size_t>( 1 ) );
+
+            for (size_t projlevel = minlevel_check; projlevel <= maxlevel_check; ++projlevel)
+            {
+                // translate neighbour from dir (hopefully to current)
+                auto set       = translate( otherMesh[level], dir );
+                auto intersect = intersection(set, currentMesh[projlevel]).on(projlevel);
+
+                size_t nbInter_ = 0;
+                intersect( [&]( const auto & interval, const auto & index ) {
+                    nbInter_ += 1;
+
+                    mm[ projlevel ].min_x = std::min( interval.start, mm[ projlevel ].min_x );
+                    mm[ projlevel ].max_x = std::max( interval.end, mm[ projlevel ].max_x );
+                    mm[ projlevel ].min_y = std::min( index(0), mm[ projlevel ].min_y );
+                    mm[ projlevel ].max_y = std::max( index(0), mm[ projlevel ].max_y );                        
+                });
+
+                if (nbInter_ > 0){
+                   minLevelAtInterface = std::min( projlevel, minLevelAtInterface );
+                }
+            }
+
+        }
+
+        if( minLevelAtInterface == 99 ) {
+            logs << "\t\t\t> No interface found ... " << std::endl;
+
+            CellList_t tmp;
+            CellArray_t ca_tmp = { tmp, false };
+            return ca_tmp;            
+        }
+
+        logs << fmt::format("\t\t\t> minLevelAtInterface : {}", minLevelAtInterface ) << std::endl;
+        logs << fmt::format( "\t\t\t> Interface min, max : x ({},{});  min, max : y ({},{}) @ level : {}", mm[ minLevelAtInterface ].min_x, 
+                               mm[ minLevelAtInterface ].max_x, mm[ minLevelAtInterface ].min_y, mm[ minLevelAtInterface ].max_y, minLevelAtInterface ) << std::endl;
+        
+        struct MinMax global;
+        global.min_x = mm[ minLevelAtInterface ].min_x; 
+        global.max_x = mm[ minLevelAtInterface ].max_x;
+        global.min_y = mm[ minLevelAtInterface ].min_y;
+        global.max_y = mm[ minLevelAtInterface ].max_y;
+
+        for( size_t level=minLevelAtInterface+1; level<mm.size(); ++level ){
+            size_t diff_level = level - minLevelAtInterface;
+            // logs << fmt::format( "\t> At level {}, diff {}", level, diff_level ) << std::endl;
+            // logs << fmt::format( "\t\t\t> Origin min, max : x ({},{});  min, max : y ({},{})", mm[ level ].min_x, 
+            //                   mm[ level ].max_x, mm[ level ].min_y, mm[ level ].max_y ) << std::endl;
+            // logs << fmt::format( "\t\t\t> Eq minlevel, min, max : x ({},{});  min, max : y ({},{}) ", mm[ level ].min_x >> diff_level, 
+            //                   mm[ level ].max_x >> diff_level, mm[ level ].min_y >> diff_level, mm[ level ].max_y >> diff_level ) << std::endl;
+            
+            global.min_x = std::min( global.min_x, mm[ level ].min_x >> diff_level );
+            global.max_x = std::max( global.max_x, mm[ level ].max_x >> diff_level );
+            global.min_y = std::min( global.min_y, mm[ level ].min_y >> diff_level );
+            global.max_y = std::max( global.max_y, mm[ level ].max_y >> diff_level );
+        }
+
+        CellList_t tmp;
+        
+        // +y, -y : horizontal propagation
+        if( std::abs( dir[0] ) == 0 && std::abs( dir[1] ) == 1 ){
+            logs << "\t> Horizontal propagation ! " << std::endl;
+            if( dir[ 1 ] > 0 ){
+                tmp[ minLevelAtInterface ][ { global.min_y } ].add_interval( { global.min_x, global.max_x } );
+            } else {
+                tmp[ minLevelAtInterface ][ { global.max_y } ].add_interval( { global.min_x, global.max_x } );
+            }
+        }
+
+        // +x, -x : vertical propagation
+        if( std::abs( dir[0] ) == 1 && std::abs( dir[1] ) == 0 ){
+            logs << "\t> Vertical propagation ! " << std::endl;
+            for(int y=global.min_y; y<=global.max_y; ++y ){
+                tmp[ minLevelAtInterface ][ { y } ].add_interval( { global.min_x, global.max_x } );
+            }
+        }
+
+
+        CellArray_t ca_tmp = { tmp, false };
+
+        logs << "Interface for propagation : " << std::endl;
+        logs << ca_tmp << std::endl;
+        logs << std::endl;
+
+        return ca_tmp;
+    }
+
+    /**
+     * Compute cells at the interface between geometricaly adjacent
+     * domains. It relies on neighbourhood mpi_subdomain_t data structure
+     * computed in Mesh_t.
+     *
+     * It returns a vector containing the number of cells (nc) and number of
+     * intervals (ni) for each neighbour (0 to n), in that order.
+     *
+     * Example: nc_0, ni_0, nc_1, ni_1, ...
+     *
+     */
+    // template<int dim, class Mesh_t, class Field_t>
+    // void _computeCartesianInterface( Mesh_t & mesh, Field_t & field ){
+    template <int dim, Direction_t dir, class Mesh_t>
+    static auto _computeCartesianInterface(Mesh_t& mesh)
+    {
+        using CellList_t      = typename Mesh_t::cl_type;
+        using CellArray_t     = typename Mesh_t::ca_type;
+        using mesh_id_t       = typename Mesh_t::mesh_id_t;
+        using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t;
+
+        // give access to geometricaly neighbour process rank and mesh
+        std::vector<mpi_subdomain_t>& neighbourhood = mesh.mpi_neighbourhood();
+        std::size_t n_neighbours                    = neighbourhood.size();
+
+        std::vector<CellList_t> interface(n_neighbours);
+
+        // operation are on leaves only
+        auto currentMesh = mesh[mesh_id_t::cells];
+
+        // looks like using only FACE is better than using DIAG or FACE_AND_DIAG
+        auto dirs = getDirection<dim, dir>();
+
+        for (std::size_t nbi = 0; nbi < n_neighbours; ++nbi)
+        {
+            auto neighbour_mesh = neighbourhood[nbi].mesh[mesh_id_t::cells];
+
+            for (size_t ist = 0; ist < dirs.size(); ++ist)
+            {
+                const auto& stencil = dirs[ist];
+
+                size_t minlevel = neighbour_mesh.min_level();
+                size_t maxlevel = neighbour_mesh.max_level();
+
+                for (size_t level = minlevel; level <= maxlevel; ++level)
+                {
+                    // for each level we need to check level -1 / 0 / +1
+                    std::size_t minlevel_check = static_cast<std::size_t>(
+                        std::max(static_cast<int>(currentMesh.min_level()), static_cast<int>(level) - 1));
+                    std::size_t maxlevel_check = std::min(currentMesh.max_level(), level + static_cast<std::size_t>(1));
+
+                    for (std::size_t projlevel = minlevel_check; projlevel <= maxlevel_check; ++projlevel)
+                    {
+                        // translate neighbour from dir (hopefully to current) all direction are tested
+                        auto set       = translate(neighbour_mesh[level], stencil);
+                        auto intersect = intersection(set, currentMesh[projlevel]).on(projlevel);
+
+                        size_t nbInter_ = 0, nbCells_ = 0;
+                        intersect(
+                            [&](auto& interval, [[maybe_unused]] auto& index)
+                            {
+                                nbInter_ += 1;
+                                nbCells_ += interval.size();
+                            });
+
+                        // we get more interval / cells, than wanted because neighbour has bigger cells
+                        if (nbInter_ > 0 && projlevel > level)
+                        {
+                            auto set_  = translate(currentMesh[projlevel], stencil);
+                            auto diff_ = difference(intersect, set_);
+
+                            nbInter_ = 0;
+                            nbCells_ = 0;
+                            diff_(
+                                [&](auto& interval, auto& index)
+                                {
+                                    nbInter_ += 1;
+                                    nbCells_ += interval.size();
+                                    // field( projlevel, i, index ) = world.rank() * 10;
+                                    interface[nbi][projlevel][index].add_interval(interval);
+                                });
+                        }
+                        else
+                        {
+                            if (nbInter_ > 0)
+                            {
+                                intersect(
+                                    [&](auto& interval, auto& index)
+                                    {
+                                        interface[nbi][projlevel][index].add_interval(interval);
+                                    });
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        std::vector<CellArray_t> interface_(n_neighbours);
+        for (std::size_t nbi = 0; nbi < n_neighbours; ++nbi)
+        {
+            interface_[nbi] = {interface[nbi], true};
+        }
+
+        return interface_;
+    }
+
+    /**
+     * Compute fluxes of cells between MPI processes. In -fake- MPI environment. To
+     * use it in true MPI juste remove the loop over "irank", and replace irank by myrank;
+     *
+     */
+    void compute_load_balancing_fluxes(std::vector<MPI_Load_Balance>& all)
+    {
+        for (size_t irank = 0; irank < all.size(); ++irank)
+        {
+            // number of cells
+            // supposing each cell has a cost of 1. ( no level dependency )
+            int32_t load = all[irank]._load;
+
+            std::size_t n_neighbours = all[irank].neighbour.size();
+
+            {
+                std::cerr << "[compute_load_balancing_fluxes] Process # " << irank << " load : " << load << std::endl;
+                std::cerr << "[compute_load_balancing_fluxes] Process # " << irank << " nneighbours : " << n_neighbours << std::endl;
+                std::cerr << "[compute_load_balancing_fluxes] Process # " << irank << " neighbours : ";
+                for (size_t in = 0; in < all[irank].neighbour.size(); ++in)
+                {
+                    std::cerr << all[irank].neighbour[in] << ", ";
+                }
+                std::cerr << std::endl;
+            }
+
+            // load of each process (all processes not only neighbour)
+            std::vector<int64_t> loads;
+
+            // data "load" to transfer to neighbour processes
+            all[irank].fluxes.resize(n_neighbours);
+            std::fill(all[irank].fluxes.begin(), all[irank].fluxes.end(), 0);
+
+            const std::size_t n_iterations = 1;
+
+            for (std::size_t k = 0; k < n_iterations; ++k)
+            {
+                // numbers of neighboors processes for each neighbour process
+                std::vector<std::size_t> nb_neighbours;
+
+                if (irank == 0)
+                {
+                    std::cerr << "[compute_load_balancing_fluxes] Fluxes iteration # " << k << std::endl;
+                }
+
+                // // get info from processes
+                // mpi::all_gather(world, load, loads);
+                // mpi::all_gather(world, m_mpi_neighbourhood.size(), nb_neighbours);
+
+                // load of current process
+                int32_t load_np1 = load;
+
+                // compute updated load for current process based on its neighbourhood
+                for (std::size_t j_rank = 0; j_rank < n_neighbours; ++j_rank)
+                {
+                    auto neighbour_rank = static_cast<std::size_t>(all[irank].neighbour[j_rank]);
+                    auto neighbour_load = all[irank].load[j_rank];
+                    auto diff_load      = neighbour_load - load;
+
+                    std::size_t nb_neighbours_neighbour = all[neighbour_rank].neighbour.size();
+
+                    double weight = 1. / static_cast<double>(std::max(n_neighbours, nb_neighbours_neighbour) + 1);
+
+                    int32_t transfertLoad = static_cast<int32_t>(std::lround(weight * static_cast<double>(diff_load)));
+
+                    all[irank].fluxes[j_rank] += transfertLoad;
+
+                    load_np1 += transfertLoad;
+                }
+
+                // do check on load & fluxes ?
+
+                {
+                    std::cerr << "fluxes : ";
+                    for (size_t in = 0; in < n_neighbours; ++in)
+                    {
+                        std::cerr << all[irank].fluxes[in] << ", ";
+                    }
+                    std::cerr << std::endl;
+                }
+
+                // load_transfer( load_fluxes );
+
+                load = load_np1;
+            }
+        }
+    }
+
+    /**
+     * Check if there is an intersection between two meshes. We move one mesh into the direction
+     * of the other based on "dir" stencils (template parameter). Here, we rely on the 2:1 balance.
+     * By default, stencils are "1" based, which means that we move the mesh by one unit element. This
+     * could be changed if necessary by multiplying the stencils by an integer value.
+     */
+    template <Direction_t dir, class Mesh_t>
+    bool intersectionExists(Mesh_t& meshA, Mesh_t& meshB)
+    {
+        using mesh_id_t = typename Mesh_t::mesh_id_t;
+        
+        constexpr size_t dim = Mesh_t::dim;
+
+        // operation are on leaves cells only
+        auto & meshA_ = meshA[mesh_id_t::cells];
+        auto & meshB_ = meshB[mesh_id_t::cells];
+
+        // get stencils for direction
+        auto dirs = getDirection<dim, dir>();
+
+        for (size_t ist = 0; ist < dirs.size(); ++ist)
+        {
+            const auto& stencil = dirs[ist];
+
+            size_t minlevel = meshB_.min_level();
+            size_t maxlevel = meshB_.max_level();
+
+            for (size_t level = minlevel; level <= maxlevel; ++level)
+            {
+                // for each level we need to check level -1 / 0 / +1
+                std::size_t minlevel_check = static_cast<std::size_t>(
+                    std::max(static_cast<int>(meshA_.min_level()), static_cast<int>(level) - 1));
+                std::size_t maxlevel_check = std::min(meshA_.max_level(), level + static_cast<std::size_t>(1));
+
+                for (std::size_t projlevel = minlevel_check; projlevel <= maxlevel_check; ++projlevel)
+                {
+                    // translate meshB_ in "dir" direction
+                    auto set       = translate(meshB_[level], stencil);
+                    auto intersect = intersection(set, meshA_[projlevel]).on(projlevel);
+
+                    size_t nbInter_ = 0;
+                    intersect(
+                        [&]([[maybe_unused]] auto& interval, [[maybe_unused]] auto& index)
+                        {
+                            nbInter_ += 1;
+                        });
+
+                    if (nbInter_ > 0)
+                    {
+                        return true;
+                    }
+                }
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Discover new neighbour connection that might arise during load balancing
+     */
+    template <class Mesh_t>
+    void discover_neighbour(Mesh_t& mesh)
+    {
+        using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t;
+
+        boost::mpi::communicator world;
+
+        // DEBUG
+        std::ofstream logs;
+        logs.open("log_" + std::to_string(world.rank()) + ".dat", std::ofstream::app);
+        logs << "\n# [LoadBalancer] discover_neighbour" << std::endl;
+
+        // give access to geometricaly neighbour process rank and mesh
+        std::vector<mpi_subdomain_t>& neighbourhood = mesh.mpi_neighbourhood();
+
+        // bool requireGeneralUpdate = false;
+
+        std::vector<bool> keepNeighbour(neighbourhood.size(), true);
+
+        std::vector<std::vector<int>> neighbourConnection(neighbourhood.size());
+
+        // for each neighbour we check if there is an intersection with another one.
+        for (size_t nbi = 0; nbi < neighbourhood.size(); ++nbi)
+        {
+            // check current - neighbour connection
+            keepNeighbour[nbi] = intersectionExists<Direction_t::FACE_AND_DIAG>(mesh, neighbourhood[nbi].mesh);
+
+            // require update if a connection is lost
+            // requireGeneralUpdate = requireGeneralUpdate || ( ! keepNeighbour[ nbi ] );
+
+            if (!keepNeighbour[nbi])
+            {
+                logs << fmt::format("\t> Loosing neighbour connection with {}", neighbourhood[nbi].rank) << std::endl;
+            }
+
+            // check neighbour - neighbour connection
+            for (size_t nbj = nbi + 1; nbj < neighbourhood.size(); ++nbj)
+            {
+                logs << fmt::format("\t> Checking neighbourhood connection {} <-> {}", neighbourhood[nbi].rank, neighbourhood[nbj].rank)
+                     << std::endl;
+                bool connected = intersectionExists<Direction_t::FACE_AND_DIAG>(neighbourhood[nbi].mesh, neighbourhood[nbj].mesh);
+
+                if (connected)
+                {
+                    neighbourConnection[nbi].emplace_back(neighbourhood[nbj].rank);
+                    neighbourConnection[nbj].emplace_back(neighbourhood[nbi].rank);
+                }
+            }
+        }
+
+        // communicate to neighbours the list of connection they have
+        for (size_t nbi = 0; nbi < neighbourhood.size(); ++nbi)
+        {
+            // debug
+            logs << "\t> Sending computed neighbour: {";
+            for (const auto& i : neighbourConnection[nbi])
+            {
+                logs << i << ", ";
+            }
+            logs << "} to process " << neighbourhood[nbi].rank << std::endl;
+            // end debug
+
+            world.send(neighbourhood[nbi].rank, 28, neighbourConnection[nbi]);
+        }
+
+        // map of current MPI neighbours processes rank
+        std::map<int, bool> _tmp; // key = rank, value = required
+        for (size_t nbi = 0; nbi < neighbourhood.size(); ++nbi)
+        {
+            if (keepNeighbour[nbi])
+            {
+                _tmp.emplace(std::make_pair(neighbourhood[nbi].rank, true));
+            }
+        }
+
+        logs << "\t> Receiving computed neighbour connection list from neighbour processes " << std::endl;
+        for (size_t nbi = 0; nbi < neighbourhood.size(); ++nbi)
+        {
+            std::vector<int> _rcv_neighbour_list;
+            world.recv(neighbourhood[nbi].rank, 28, _rcv_neighbour_list);
+
+            for (size_t in = 0; in < _rcv_neighbour_list.size(); ++in)
+            {
+                if (_tmp.find(_rcv_neighbour_list[in]) == _tmp.end())
+                {
+                    logs << "\t\t> New neighbour detected : " << _rcv_neighbour_list[in] << std::endl;
+
+                    _tmp[_rcv_neighbour_list[in]] = true;
+                    // requireGeneralUpdate = requireGeneralUpdate || true;
+                }
+            }
+        }
+
+        world.barrier();
+
+        // update neighbours ranks
+        neighbourhood.clear();
+        for (const auto& ni : _tmp)
+        {
+            neighbourhood.emplace_back(ni.first);
+        }
+
+        // debug
+        logs << "\t> New neighbourhood : {";
+        for (size_t nbi = 0; nbi < neighbourhood.size(); ++nbi)
+        {
+            logs << neighbourhood[nbi].rank << ", ";
+        }
+        logs << "}" << std::endl;
+        // end debug
+
+        // gather neighbour mesh
+        mesh.update_mesh_neighbour();
+
+    }
+
+} // namespace samurai
+#endif
+
+
+/**
+ * This function perform a "fake" load-balancing by updating an integer scalar field containing the rank
+ * of the cell after the load balancing; This is done using MORTON SFC.
+ *
+ * This does not require to have the graph.
+ */
+// template<int dim, class SFC_t, class Mesh, class Field_t>
+// void perform_load_balancing_SFC( Mesh & mesh, int ndomains, Field_t & fake_mpi_rank ) {
+
+//     using Config = samurai::MRConfig<dim>;
+//     using Mesh_t    = samurai::MRMesh<Config>;
+//     using cell_t    = typename Mesh::cell_t;
+
+//     SFC<SFC_t> sfc;
+
+//     // SFC key (used for implicit sorting through map mechanism)
+//     std::map<SFC_key_t, cell_t> sfc_map;
+
+//     int sfc_max_level = mesh.max_level(); // for now but remember must <= 21 for Morton
+
+//     SFC_key_t min=std::numeric_limits<SFC_key_t>::max(), max=std::numeric_limits<SFC_key_t>::min();
+
+//     samurai::for_each_cell( mesh, [&]( const auto & cell ){
+
+//         // ij coordinate of cell
+//         double dxmax = samurai::cell_length( sfc_max_level );
+
+//         auto tmp = cell.center() / dxmax;
+
+//         xt::xtensor_fixed<uint32_t, xt::xshape<dim>> ij = { static_cast<uint32_t>( tmp( 0 ) ),
+//                                                             static_cast<uint32_t>( tmp( 1 ) ) };
+
+//         auto key = sfc.template getKey<dim>( ij );
+//         // std::cerr << "\t> Coord (" << ij( 0 ) << ", " << ij( 1 ) << ") ----> " << key << std::endl;
+
+//         sfc_map[ key ] = cell ;
+
+//     });
+
+//     size_t nbcells_tot   = mesh.nb_cells( Mesh::mesh_id_t::cells ); //load -balancing on leaves
+//     size_t ncellsPerProc = std::floor( nbcells_tot / ndomains );
+
+//     // std::cerr << "\n\t> Morton index [" << min << ", " << max << "]" << std::endl;
+//     std::cerr << "\t> Total number of cells : " << nbcells_tot << std::endl;
+//     std::cout << "\t> Perfect load-balancing (weight-cell = 1.) : " << static_cast<size_t>( nbcells_tot / ndomains )
+//               << " / MPI" << std::endl;
+
+//     size_t cindex = 0;
+//     for(const auto & item : sfc_map ) {
+//         auto sfc_key = item.first;
+
+//         int fake_rank = std::floor( cindex / ncellsPerProc );
+//         if ( fake_rank >= ndomains ) fake_rank = ndomains - 1;
+
+//         fake_mpi_rank[ item.second ] = fake_rank;
+//         cindex ++;
+//     }
+
+// }
+
+// enum Interval_CellPosition { FIRST, LAST };
+
+// template<int dim, class Mesh, class Field_t>
+// void perform_load_balancing_SFC_Interval( Mesh & mesh, int ndomains, Field_t & fake_mpi_rank, const Interval_CellPosition &icp ) {
+
+//     using inter_t = samurai::Interval<int, long long>;
+
+//     struct Data {
+//         size_t level;
+//         inter_t interval;
+//         xt::xtensor_fixed<samurai::default_config::value_t, xt::xshape<dim - 1>> indices;
+//     };
+
+//     // SFC key (used for implicit sorting through map mechanism)
+//     std::map<SFC_key_t, Data> sfc_map;
+
+//     SFC<Morton> sfc;
+//     int sfc_max_level = mesh.max_level(); // for now but remember must <= 21 for Morton
+
+//     SFC_key_t min=std::numeric_limits<SFC_key_t>::max(), max=std::numeric_limits<SFC_key_t>::min();
+
+//     size_t ninterval = 0;
+//     samurai::for_each_interval( mesh, [&]( std::size_t level, const auto& inter, const auto& index ){
+
+//         // get Logical coordinate or first cell
+//         xt::xtensor_fixed<int, xt::xshape<dim>> icell;
+
+//         icp == Interval_CellPosition::LAST ? icell( 0 ) = inter.end - 1 : icell( 0 ) = inter.start;
+
+//         for(int idim=0; idim<dim-1; ++idim){
+//             icell( idim + 1 ) = index( idim );
+//         }
+
+//         // convert logical coordinate to max level logical coordinates
+//         for( int idim=0; idim<dim; ++idim ){
+//             icell( idim ) = icell( idim ) << ( mesh.max_level() - level );
+//         }
+
+//         xt::xtensor_fixed<uint32_t, xt::xshape<dim>> ijk;
+
+//         if constexpr ( dim == 2 ) ijk = { static_cast<uint32_t>( icell( 0 ) ),
+//                                           static_cast<uint32_t>( icell( 1 ) ) };
+
+//         if constexpr ( dim == 3 ) ijk = { static_cast<uint32_t>( icell( 0 ) ),
+//                                           static_cast<uint32_t>( icell( 1 ) ),
+//                                           static_cast<uint32_t>( icell( 2 ) ) };
+
+//         sfc_map[ sfc.getKey<dim>( ijk ) ] = { level, inter, index };
+
+//         ninterval ++;
+//     });
+
+//     size_t ninterPerProc = std::floor( ninterval / ndomains );
+
+//     // std::cerr << "\n\t> Morton index [" << min << ", " << max << "]" << std::endl;
+//     std::cerr << "\t> Total number of interval : " << ninterval << std::endl;
+//     std::cout << "\t> Perfect load-balancing (weight-interval = 1.) : " << static_cast<size_t>( ninterPerProc )
+//               << " / MPI" << std::endl;
+
+//     size_t cindex = 0;
+//     for(const auto & item : sfc_map ) {
+//         int fake_rank = std::floor( cindex / ninterPerProc );
+//         if ( fake_rank >= ndomains ) fake_rank = ndomains - 1;
+
+//         fake_mpi_rank( item.second.level, item.second.interval, item.second.indices ) = fake_rank;
+
+//         cindex ++;
+//     }
+
+// }
+
+// /**
+//  *
+//  * Global contains the global mesh. Since this is a toy function, it contains the union of all mesh.
+//  * meshes contains the mesh of each MPI process ( or let say the mesh of neighbour processes ...)
+//  *
+// */
+// template<int dim, class AMesh_t, class Field_t>
+// void perform_load_balancing_diffusion( AMesh_t & global, std::vector<AMesh_t> & meshes, int ndomains,
+//                                        const std::vector<samurai::MPI_Load_Balance> & all, Field_t & fake_mpi_rank ) {
+
+//     using CellList_t = typename AMesh_t::cl_type;
+
+//     struct Coord_t { xt::xtensor_fixed<double, xt::xshape<dim>> coord; };
+
+//     std::vector<Coord_t> barycenters ( ndomains );
+
+//     { // compute barycenter of current domains ( here, all domains since with simulate multiple MPI domains)
+
+//         int maxlevel = 4;
+//         for( size_t m_=0; m_ < meshes.size(); ++m_ ) {
+
+//             double wght_tot = 0.;
+//             samurai::for_each_cell( meshes[ m_ ], [&]( const auto & cell ) {
+
+//                 // [OPTIMIZATION] precompute weight as array
+//                 double wght = 1. / ( 1 << ( maxlevel - cell.level ) );
+
+//                 const auto cc = cell.center();
+
+//                 barycenters[ m_ ].coord( 0 ) += cc( 0 ) * wght;
+//                 barycenters[ m_ ].coord( 1 ) += cc( 1 ) * wght;
+//                 if constexpr ( dim == 3 ) { barycenters[ m_ ].coord( 2 ) += cc( 2 ) * wght; }
+
+//                 wght_tot += wght;
+
+//             });
+
+//             barycenters[ m_ ].coord( 0 ) /= wght_tot;
+//             barycenters[ m_ ].coord( 1 ) /= wght_tot;
+//             if constexpr ( dim == 3 ) barycenters[ m_ ].coord( 2 ) /= wght_tot;
+
+//             std::cerr << "\t> Domain # " << m_ << ", bc : {" << barycenters[ m_ ].coord( 0 ) << ", "
+//                       << barycenters[ m_ ].coord( 1 ) << "}" << std::endl;
+
+//         }
+
+//     }
+
+//     std::vector<CellList_t> new_meshes( ndomains );
+//     std::vector<CellList_t> exchanged( ndomains );
+
+//     for( std::size_t m_ = 0; m_ < meshes.size(); ++m_ ){ // over each domains
+
+//         std::cerr << "\t> Working on domains # " << m_ << std::endl;
+
+//         // auto dist = samurai::make_field<int, 1>( "rank", meshes[ m_ ] );
+
+//         // id des neighbours dans le tableau de la structure MPI_Load_Balance
+//         // attention différent du rank mpi !
+//         std::vector<std::size_t> id_send;
+
+//         int n_neighbours = static_cast<int>( all[ m_ ].neighbour.size() );
+//         for(std::size_t nbi = 0; nbi<n_neighbours; ++nbi ){
+
+//             auto nbi_rank         = all[ m_ ].neighbour[ nbi ];
+//             int nbCellsToTransfer = all[ m_ ].fluxes[ nbi ];
+
+//             std::cerr << "\t\t> Neighbour rank : " << nbi_rank << std::endl;
+//             std::cerr << "\t\t> Neighbour flux : " << all[ m_ ].fluxes[ nbi ] << std::endl;
+
+//             if( nbCellsToTransfer < 0 ){
+
+//                 id_send.emplace_back( nbi );
+
+//                 // Logical_coord_t stencil;
+//                 // { // Compute the stencil or normalized direction to neighbour
+//                 //     Coord_t tmp;
+//                 //     double n2 = 0;
+//                 //     for(int idim = 0; idim<dim; ++idim ){
+//                 //         tmp.coord[ idim ] = barycenters[ nbi_rank ].coord( idim )- barycenters[ m_ ].coord( idim );
+//                 //         n2 += tmp.coord( idim ) * tmp.coord( idim );
+//                 //     }
+
+//                 //     n2 = std::sqrt( n2 );
+
+//                 //     for(int idim = 0; idim<dim; ++idim ){
+//                 //         tmp.coord( idim ) /= n2;
+//                 //         stencil.coord( idim ) = static_cast<int>( tmp.coord( idim ) / 0.5 );
+//                 //     }
+
+//                 //     std::cerr << "\t\t> stencil for neighbour # " << nbi_rank << " : ";
+//                 //     for(size_t idim=0; idim<dim; ++idim ){
+//                 //         std::cerr << stencil.coord( idim ) << ",";
+//                 //     }
+//                 //     std::cerr << std::endl;
+//                 // }
+
+//             }
+//         }
+
+//         std::cerr << "\t\t> Number RECV : " << id_send.size() << std::endl;
+
+//         std::vector<int> already_given( n_neighbours, 0 );
+
+//         for_each_interval( meshes[ m_ ], [&]( std::size_t level, const auto& interval, const auto& index ){
+//             Coord_t ibar;
+
+//             std::cerr << "\t\t> Interval [" << interval.start << ", " << interval.end << "[" << std::endl;
+
+//             double dm = 1. / (1 << level );
+//             ibar.coord( 0 ) = ( (interval.end - interval.start) * 0.5 + interval.start ) * dm ;
+//             ibar.coord( 1 ) = ( index( 1 ) * dm ) + dm * 0.5;
+//             if constexpr ( dim == 3 ) ibar( 2 ) = ( index( 2 ) * dm ) + dm * 0.5;
+
+//             std::cerr << "\t\t\t> level " << level << " center @ {" << ibar.coord(0) << ", " << ibar.coord(1) << "}" << std::endl;
+
+//             // find which neighbour will potentially receive this interval
+//             int winner_id      = -1; // keep it to current if still negative
+//             double winner_dist = std::numeric_limits<double>::max();
+//             for( int nbi=0; nbi<id_send.size(); ++ nbi ){
+
+//                 auto neighbour_rank = all[ m_ ].neighbour[ id_send[ nbi ] ];
+
+//                 double dist = 0.0;
+//                 for( int idim=0; idim < dim; ++idim){
+//                     double d = barycenters[ m_ ].coord( idim ) - barycenters[ neighbour_rank ].coord( idim );
+//                     dist += d * d;
+//                 }
+//                 dist = std::sqrt( dist );
+
+//                 std::cerr << "\t\t\t> Dist to neighbour #" << neighbour_rank << " : " << dist << " vs " << winner_dist << std::endl;
+//                 std::cerr << "\t\t\t> Already given to this neighbour " << already_given[ neighbour_rank ] << std::endl;
+//                 std::cerr << "\t\t\t> NbCells of this interval " << interval.size() << std::endl;
+//                 std::cerr << "\t\t\t> fluxes for this neighbour : " << all[ m_ ].fluxes[ id_send[ nbi ] ] << std::endl;
+
+//                 if( dist < winner_dist &&
+//                     already_given[ neighbour_rank ] + interval.size() <= (- all[ m_ ].fluxes[ id_send[ nbi ] ]) ){
+
+//                     winner_id   = id_send[ nbi ];
+//                     winner_dist = dist;
+//                 }
+
+//             }
+
+//             if( winner_id >= 0 ){
+//                 auto neighbour_rank = all[ m_ ].neighbour[ winner_id ];
+//                 std::cerr << "\t> Interval given to process " << neighbour_rank << " + ncells : " << interval.size() << std::endl;
+//                 exchanged[ neighbour_rank ][ level ][ index ].add_interval( interval );
+//                 already_given[ neighbour_rank ] += interval.size();
+//             }else{
+//                 new_meshes[ m_ ][ level ][ index ].add_interval( interval );
+//             }
+
+//         });
+
+//         meshes[ m_ ] = { new_meshes[ m_ ], true };
+
+//     }
+
+//     for( std::size_t m_ = 0; m_ < meshes.size(); ++m_ ){ // over each domains
+//         for( int level=global.min_level(); level<=global.max_level(); ++level ) {
+//             auto intersect = intersection( global[ level ], meshes[ m_ ][ level ]);
+
+//             intersect([&]( auto & i, auto & index ) {
+//                 fake_mpi_rank( level, i, index ) = static_cast<int>( m_ );
+//             });
+//         }
+
+//         AMesh_t tmp = { exchanged[ m_], true };
+//         for( int level=global.min_level(); level<=global.max_level(); ++level ) {
+//             auto intersect = intersection( global[ level ], tmp[ level ]);
+
+//             intersect([&]( auto & i, auto & index ) {
+//                 fake_mpi_rank( level, i, index ) = static_cast<int>( m_ );
+//             });
+//         }
+//     }
+
+// }
diff --git a/include/samurai/load_balancing_diffusion.hpp b/include/samurai/load_balancing_diffusion.hpp
new file mode 100644
index 000000000..29947c322
--- /dev/null
+++ b/include/samurai/load_balancing_diffusion.hpp
@@ -0,0 +1,398 @@
+#pragma once
+
+#include <map>
+#include "load_balancing.hpp"
+#include "field.hpp"
+
+// for std::sort
+#include <algorithm>
+
+#ifdef SAMURAI_WITH_MPI
+namespace Load_balancing{
+
+    class Diffusion : public samurai::LoadBalancer<Diffusion> {
+
+        private:
+
+            int _ndomains;
+            int _rank;
+
+            const double _unbalance_threshold = 0.05; // 5 %
+
+        public:
+
+            Diffusion() {
+
+    #ifdef SAMURAI_WITH_MPI
+                boost::mpi::communicator world;
+                _ndomains = world.size();
+                _rank     = world.rank();
+    #else
+                _ndomains = 1;
+                _rank     = 0;
+    #endif
+            }
+
+            inline std::string getName() const { return "diffusion"; }
+
+            template <class Mesh_t>
+            bool require_balance_impl( Mesh_t & mesh ) {
+
+                boost::mpi::communicator world;
+
+                logs << fmt::format("\n# [Diffusion_LoadBalancer] required_balance_impl ") << std::endl;
+
+                double nbCells_tot = 0;
+                std::vector<double> nbCellsPerProc;
+                boost::mpi::all_gather( world, static_cast<double>( mesh.nb_cells( Mesh_t::mesh_id_t::cells ) ), nbCellsPerProc );
+
+                for(size_t ip=0; ip<nbCellsPerProc.size(); ++ip ) {
+                    nbCells_tot += nbCellsPerProc[ ip ];
+                }
+
+                // no weight while computing load 
+                double dc = nbCells_tot  / static_cast<double> ( world.size() );
+
+                for(size_t ip=0; ip<nbCellsPerProc.size(); ++ip ) {
+                    double diff = std::abs( nbCellsPerProc[ ip ] - dc ) / dc;
+
+                    if( diff > _unbalance_threshold ) return true;
+                }
+
+                return false;
+            }
+
+            template<class Mesh_t>
+            auto reordering_impl( Mesh_t & mesh ) {
+                auto flags = samurai::make_field<int, 1>("diffusion_flag", mesh);
+                flags.fill( _rank );
+
+                return flags;
+            }
+
+            template<size_t dim, typename Stencil_t, typename Coord_t>
+            std::vector<Stencil_t> getStencilToNeighbour( const Coord_t & bc_current, const Coord_t & bc_neigh ) const {
+
+                Stencil_t dir_from_neighbour;
+                std::vector<Stencil_t> stencils;
+
+                Coord_t tmp;
+                double n2 = 0.;
+                for( size_t idim = 0; idim<dim; ++idim ){
+                    tmp( idim ) = bc_current( idim ) - bc_neigh( idim );
+                    n2 += tmp( idim ) * tmp( idim );
+                }
+
+                n2 = std::sqrt( n2 );
+
+                for( size_t idim = 0; idim<dim; ++idim ){
+                    tmp( idim ) /= n2;
+                    dir_from_neighbour( idim ) = static_cast<int>( tmp( idim ) / 0.5 );
+
+                    // FIXME why needed ? 
+                    if( std::abs( dir_from_neighbour( idim ) ) > 1 ) {
+                        dir_from_neighbour( idim ) < 0 ? dir_from_neighbour( idim ) = -1 : dir_from_neighbour( idim ) = 1;
+                    }
+
+                    if( dir_from_neighbour( idim ) != 0 ) {
+                        Stencil_t dd;
+                        dd.fill(0);
+                        dd(idim) = dir_from_neighbour(idim);
+                        stencils.emplace_back( dd );
+                    }
+                }
+                
+                return stencils;
+            }
+
+            template<class Mesh_t>
+            auto load_balance_impl( Mesh_t & mesh ){
+
+                using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t;
+                using CellList_t      = typename Mesh_t::cl_type;
+                using mesh_id_t       = typename Mesh_t::mesh_id_t;
+
+                using Coord_t = xt::xtensor_fixed<double, xt::xshape<Mesh_t::dim>>;
+                using Stencil = xt::xtensor_fixed<int, xt::xshape<Mesh_t::dim>>;
+
+                boost::mpi::communicator world;
+
+                // For debug
+                // std::ofstream logs; 
+                // logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app );
+                logs << fmt::format("> New load-balancing using {} ", getName() ) << std::endl;
+                
+                std::vector<mpi_subdomain_t> & neighbourhood = mesh.mpi_neighbourhood();
+
+                /*
+                * Correction of the list of neighbour process to take into account into the graph when computing
+                * fluxes that this load-balancing strategy does not exchange in diagonal. Might no longer be necessary
+                * if stencil are splitted by direction.
+                */
+                // std::vector<int> forceNeighbour;
+                // {
+                //     std::vector<mpi_subdomain_t> & neighbourhood_tmp = mesh.mpi_neighbourhood();
+
+                //     for( auto & neighbour : neighbourhood_tmp ){
+                //         auto interface = samurai::cmptInterface<Mesh_t::dim, samurai::Direction_t::FACE>( mesh, neighbour.mesh );
+                //         size_t nintervals = 0;
+                //         for_each_interval(interface, [&]( [[maybe_unused]] size_t level, [[maybe_unused]] const auto & i, [[maybe_unused]] const auto & ii ){
+                //             nintervals ++;
+                //         });
+                //         if( nintervals > 0 ){
+                //             forceNeighbour.emplace_back( neighbour.rank );
+                //             neighbourhood.emplace_back( neighbour );
+                //         }
+                //     }
+
+                //     logs << "Corrected neighbours : ";
+                //     for(const auto & fn : forceNeighbour ) 
+                //         logs << fn << ", ";
+                //     logs << std::endl;
+                // }
+
+                size_t n_neighbours = neighbourhood.size();
+
+                // compute fluxes in terms of number of intervals to transfer/receive
+                // by default, perform 5 iterations
+                // std::vector<int> fluxes = samurai::cmptFluxes<samurai::BalanceElement_t::CELL>( mesh, forceNeighbour, 5 );
+                std::vector<int> fluxes = samurai::cmptFluxes<samurai::BalanceElement_t::CELL>( mesh, 5 );
+
+                std::vector<int> new_fluxes( fluxes );
+
+                // get loads from everyone
+                std::vector<int> loads;
+                int my_load = static_cast<int>( samurai::cmptLoad<samurai::BalanceElement_t::CELL>( mesh ) );
+                boost::mpi::all_gather( world, my_load, loads );
+
+                { // some debug info
+                    logs << "load : " << my_load << std::endl;
+                    logs << "nneighbours : " << n_neighbours << std::endl;
+                    logs << "neighbours : ";
+                    for( size_t in=0; in<neighbourhood.size(); ++in )
+                        logs << neighbourhood[ in ].rank << ", ";
+                    logs << std::endl << "fluxes : ";
+                    for( size_t in=0; in<neighbourhood.size(); ++in )
+                        logs << fluxes[ in ] << ", ";
+                    logs << std::endl;
+                }
+
+                std::vector<CellList_t> cl_to_send( n_neighbours );
+
+                // set field "flags" for each rank. Initialized to current for all cells (leaves only)
+                auto flags = samurai::make_field<int, 1>("diffusion_flag", mesh);
+                flags.fill( world.rank() );
+
+                // load balancing order
+                std::vector<size_t> order( n_neighbours );
+                {
+                    for( size_t i=0; i<order.size(); ++i ){ order[ i ] = i; }
+
+                    // order neighbour to echange data with, based on load
+                    std::sort( order.begin(), order.end(), [&fluxes]( size_t i, size_t j){
+                        return fluxes[ i ] < fluxes[ j ] ;
+                    });
+
+                }
+
+                for( size_t neigh_i=0; neigh_i<n_neighbours; ++neigh_i ){
+
+                    // neighbour [0, n_neighbours[
+                    auto neighbour_local_id = order[ neigh_i ];
+
+                    // all cells have been given, neighbours that might left are "givers" (remember the fluxes were sorted)
+                    if( fluxes[ neighbour_local_id ] >= 0 ) break;
+
+                    logs << fmt::format("\t> Working on neighbour # {}", neighbourhood[ neighbour_local_id ].rank ) << std::endl;
+
+                    // move the interface in the direction of "the center of mass" of the domain
+                    // we basically want to move based on the normalized cartesian axis
+                    //
+                    // Q?: take into account already given intervals to compute BC ?
+                    // (no weight here) 
+                    Coord_t bc_current   = samurai::_cmpCellBarycenter<Mesh_t::dim>( mesh[ mesh_id_t::cells ] );
+                    Coord_t bc_neighbour = samurai::_cmpCellBarycenter<Mesh_t::dim>( neighbourhood[ neighbour_local_id ].mesh[ mesh_id_t::cells ] );
+
+                    // Compute normalized direction to neighbour
+                    // Stencil dir_from_neighbour;
+                    std::vector<Stencil> dirs = getStencilToNeighbour<Mesh_t::dim, Stencil>( bc_current, bc_neighbour ); 
+                    // {
+                    //     auto aa_ = getStencilToNeighbour<Mesh_t::dim, Stencil>( bc_current, bc_neighbour );
+
+                    //     logs << fmt::format("\t> Number of stencils: {}", aa_.size()) << std::endl;
+                    //     for( const auto & st : aa_ ) {
+                    //         logs << fmt::format("\t\t> Stencil : ({},{})", st(0), st(1) ) << std::endl;
+                    //     }
+
+                    //     Coord_t tmp;
+                    //     double n2 = 0.;
+                    //     for( size_t idim = 0; idim<Mesh_t::dim; ++idim ){
+                    //         tmp( idim ) = bc_current( idim ) - bc_neighbour( idim );
+                    //         n2 += tmp( idim ) * tmp( idim );
+                    //     }
+
+                    //     n2 = std::sqrt( n2 );
+
+                    //     for( size_t idim = 0; idim<Mesh_t::dim; ++idim ){
+                    //         tmp( idim ) /= n2;
+                    //         dir_from_neighbour( idim ) = static_cast<int>( tmp( idim ) / 0.5 );
+
+                    //         // FIXME why needed ? 
+                    //         if( std::abs( dir_from_neighbour( idim ) ) > 1 ) {
+                    //             dir_from_neighbour( idim ) < 0 ? dir_from_neighbour( idim ) = -1 : dir_from_neighbour( idim ) = 1;
+                    //         }
+                    //     }
+                        
+                    //     // Avoid diagonals exchange, and emphaze x-axis. Maybe two phases propagation in case of diagonal ?
+                    //     // i.e.: if (1, 1) -> (1, 0) then (0, 1) ?
+
+                    //     if constexpr ( Mesh_t::dim == 2 ) { 
+                    //         if( std::abs(dir_from_neighbour[0]) == 1 && std::abs( dir_from_neighbour[1]) == 1 ){
+                    //             // dir_from_neighbour[0] = 1;
+                    //             dir_from_neighbour[1] = 0;
+                    //         }
+                    //     }
+
+                    //     if constexpr ( Mesh_t::dim == 3 ) { 
+                    //         if( std::abs(dir_from_neighbour[0]) == 1 && std::abs( dir_from_neighbour[1]) == 1 && std::abs(dir_from_neighbour[2]) == 1){
+                    //             dir_from_neighbour[1] = 0;
+                    //             dir_from_neighbour[2] = 0;
+                    //         }
+                    //     }
+
+                    //     logs << fmt::format("\t\t> (corrected) stencil for this neighbour # {} :", neighbourhood[ neighbour_local_id ].rank);
+                    //     for(size_t idim=0; idim<Mesh_t::dim; ++idim ){
+                    //         logs << dir_from_neighbour( idim ) << ",";
+                    //     }
+                    //     logs << std::endl;
+
+                    // }
+
+                    for(const auto & dir_from_neighbour : dirs ){
+                        // direction from neighbour domain to current domain
+                        auto interface = samurai::cmptInterfaceUniform<Mesh_t::dim>( mesh, neighbourhood[ neighbour_local_id ].mesh, dir_from_neighbour );
+
+                        bool empty = false;
+                        {
+                            size_t iii = 0;
+                            samurai::for_each_interval( interface, [&]( [[maybe_unused]] size_t level, [[maybe_unused]] const auto & i, [[maybe_unused]] const auto & ii ){
+                                iii ++;
+                            });
+                            if( iii == 0 ) empty = true;
+                        }
+
+                        if( empty ) {
+                            logs << "\t> Skipping neighbour, empty interface ! " << std::endl;
+                            continue;
+                        }
+
+                        {
+                            int nCellsAtInterfaceGiven = 0, nCellsAtInterface = 0;
+                            for (size_t level = mesh.min_level(); level <= mesh.max_level(); ++level) {
+                                
+                                auto intersect = samurai::intersection( interface[ interface.min_level() ], mesh[ mesh_id_t::cells ][ level ] ).on( level ); // need handle level difference here !
+                                intersect( [&]( [[maybe_unused]] const auto & interval, [[maybe_unused]] const auto & index ){
+                                    for(size_t ii=0; ii<interval.size(); ++ii){
+                                        if( flags( level, interval, index )[ ii ] == world.rank() )
+                                        {
+                                            flags( level, interval, index )[ ii ] = neighbourhood[ neighbour_local_id ].rank;
+                                            nCellsAtInterfaceGiven += 1;
+                                        }
+                                        nCellsAtInterface += 1;
+                                    }
+
+                                });
+
+                            }
+
+                            new_fluxes[ neighbour_local_id ] += nCellsAtInterfaceGiven;
+
+                            logs << fmt::format("\t\t> NCellsAtInterface : {}, NCellsAtInterfaceGiven : {}", nCellsAtInterface, nCellsAtInterfaceGiven ) << std::endl;
+                        }
+                    
+
+                        // propagate until full-fill neighbour
+                        {
+                            size_t nbElementGiven = 1; // validate the while condition on starter
+                        
+                            logs << fmt::format("\t\t\t> Propagate for neighbour rank # {}", neighbourhood[ neighbour_local_id ].rank) << std::endl;
+
+                            int offset = 1;
+                            while( new_fluxes[ neighbour_local_id ] < 0 && nbElementGiven > 0 ){
+
+                                // intersection of interface with current mesh
+                                size_t minLevelInInterface = mesh.max_level();
+
+                                {
+                                    auto interface_on_mesh = samurai::translate( interface[ interface.min_level() ], dir_from_neighbour * offset ); // interface is monolevel !
+                                    for (size_t level = mesh.min_level(); level <= mesh.max_level(); ++level) {
+                                        size_t nIntervalAtInterface = 0;
+                                        auto intersect = samurai::intersection( interface_on_mesh, mesh[ mesh_id_t::cells ][ level ] ).on( level ); // need handle level difference here !
+                                        intersect( [&]( [[maybe_unused]] const auto & interval, [[maybe_unused]] const auto & index ){
+                                            nIntervalAtInterface += 1;
+                                        });
+
+                                        if( nIntervalAtInterface > 0 ) minLevelInInterface = std::min( minLevelInInterface, level );
+                                    }
+                                }
+
+                                if( minLevelInInterface != interface.min_level() ) { 
+                                    logs << "\t\t\t\t> [WARNING] Interface need to be update !" << std::endl;
+                                }
+                                
+                                logs << fmt::format("\t\t\t\t> Min level in interface : {}", minLevelInInterface ) << std::endl;
+
+                                // CellList_t cl_given;
+
+                                nbElementGiven = 0;
+
+                                auto interface_on_mesh = translate( interface[ interface.min_level() ], dir_from_neighbour * offset ); // interface is monolevel !
+                                for (size_t level = mesh.min_level(); level <= mesh.max_level(); ++level) {
+
+                                    size_t nCellsAtInterface = 0, nCellsAtInterfaceGiven = 0;                                                                
+                                    auto intersect = intersection( interface_on_mesh, mesh[ Mesh_t::mesh_id_t::cells ][ level ] ).on( level ); // need handle level difference here !
+
+                                    intersect( [&]( const auto & interval, const auto & index ){
+
+                                        for(size_t ii=0; ii<interval.size(); ++ii){
+                                            if( flags( level, interval, index )[ ii ] == world.rank() )
+                                            {
+                                                flags( level, interval, index )[ ii ] = neighbourhood[ neighbour_local_id ].rank;
+                                                nCellsAtInterfaceGiven += 1;
+
+                                                // cl_given[ level ][ index ].add_point( interval.start + ii );
+                                            }
+                                            nCellsAtInterface += 1;
+                                        }
+
+                                    });
+
+                                    logs << fmt::format("\t\t\t\t> At level {}, NCellsAtInterface : {}, NCellsAtInterfaceGiven : {}, nbElementGiven : {}", level,
+                                                        nCellsAtInterface, nCellsAtInterfaceGiven, nbElementGiven ) << std::endl;
+
+                                    nbElementGiven += nCellsAtInterfaceGiven;
+
+                                }
+
+                                // interface = { cl_given, false };
+
+                                new_fluxes[ neighbour_local_id ] += static_cast<int>( nbElementGiven );
+                                offset ++;
+                            }
+                            
+                        }
+                    
+                        if( new_fluxes[ neighbour_local_id ] < 0 ){
+                            logs << fmt::format("\t> Error cannot fullfill the neighbour # {}, fluxes: {} ", neighbourhood[ neighbour_local_id ].rank, new_fluxes[ neighbour_local_id ] ) << std::endl;
+                        }
+                        
+                    }
+
+                }
+
+                return flags;
+            }
+
+    };
+}
+#endif
diff --git a/include/samurai/load_balancing_diffusion_interval.hpp b/include/samurai/load_balancing_diffusion_interval.hpp
new file mode 100644
index 000000000..ca2a3acdb
--- /dev/null
+++ b/include/samurai/load_balancing_diffusion_interval.hpp
@@ -0,0 +1,459 @@
+#pragma once
+
+#include <map>
+#include "load_balancing.hpp"
+
+#ifdef SAMURAI_WITH_MP
+template<size_t dim>
+class Diffusion_LoadBalancer_interval : public samurai::LoadBalancer<Diffusion_LoadBalancer_interval<dim>> {
+
+    using Coord_t    = xt::xtensor_fixed<double, xt::xshape<dim>>;
+
+    private:
+        int _ndomains;
+        int _rank;
+
+        /**
+         * Compute the barycenter of the current domain based on mid-center of intervals.
+         * Weighted by level.
+         * 
+        */
+        template<class Mesh_t>
+        Coord_t _cmpIntervalBarycenter( Mesh_t & mesh ) { // compute barycenter of current domains ( here, all domains since with simulate multiple MPI domains)
+            
+            Coord_t bary;
+            bary.fill( 0. );
+
+            double wght_tot = 0.;
+            samurai::for_each_interval( mesh, 
+                                        [&]( std::size_t level, const auto& interval, const auto& index ) {
+                
+                // [OPTIMIZATION] precompute weight as array
+                // double wght = 1. / ( 1 << ( mesh.max_level() - level ) );
+                constexpr double wght = 1.;
+
+                Coord_t mid = _getIntervalMidPoint( level, interval, index );
+
+                for(size_t idim=0; idim<dim; ++idim ){
+                    bary( idim ) += mid( idim ) * wght;
+                }
+
+                wght_tot += wght;
+
+            });
+
+            wght_tot = std::max( wght_tot, 1e-12 );
+            for( size_t idim=0; idim<dim; ++idim ){
+                bary( idim ) /= wght_tot;
+            }
+
+            return bary;
+
+        }
+
+        template<class Interval_t, class Index_t>
+        inline Coord_t _getIntervalMidPoint( size_t level, const Interval_t & interval, 
+                                             const Index_t & index ) const {
+            Coord_t mid;
+            double csize = samurai::cell_length( level );
+
+            mid( 0 ) = ( (interval.end - interval.start) * 0.5 + interval.start ) * csize ;
+            for( size_t idim=0; idim<dim-1; ++idim ){
+                mid( idim + 1 ) = ( index( idim ) * csize ) + csize * 0.5;
+            }
+
+            return mid;
+
+        }
+
+    public:
+
+        Diffusion_LoadBalancer_interval() {
+
+#ifdef SAMURAI_WITH_MPI
+            boost::mpi::communicator world;
+            _ndomains = world.size();
+            _rank     = world.rank();
+#else
+            _ndomains = 1;
+            _rank     = 0;
+#endif
+        }
+
+        inline std::string getName() const { return "Interface_Prop_LB"; } 
+
+        template<class Mesh_t>
+        Mesh_t load_balance_impl( Mesh_t & mesh ){
+
+            using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t;
+            using CellList_t      = typename Mesh_t::cl_type;
+            using CellArray_t     = samurai::CellArray<Mesh_t::dim>;
+            using mesh_id_t       = typename Mesh_t::mesh_id_t;
+
+            boost::mpi::communicator world;
+
+            // For debug
+            std::ofstream logs; 
+            logs.open( "log_" + std::to_string( world.rank() ) + ".dat", std::ofstream::app );
+            logs << "# New load balancing" << std::endl;
+
+            // compute fluxes in terms of number of intervals to transfer/receive
+            std::vector<int> fluxes = samurai::cmptFluxes<samurai::BalanceElement_t::INTERVAL>( mesh );
+            std::vector<int> new_fluxes( fluxes );
+
+            // get loads from everyone
+            std::vector<int> loads;
+            int my_load = static_cast<int>( samurai::cmptLoad<samurai::BalanceElement_t::INTERVAL>( mesh ) );
+            boost::mpi::all_gather( world, my_load, loads );
+
+            std::vector<mpi_subdomain_t> & neighbourhood = mesh.mpi_neighbourhood();
+            size_t n_neighbours = neighbourhood.size();
+
+            { // some debug info
+                logs << "load : " << my_load << std::endl;
+                logs << "nneighbours : " << n_neighbours << std::endl;
+                logs << "neighbours : ";
+                for( size_t in=0; in<neighbourhood.size(); ++in )
+                    logs << neighbourhood[ in ].rank << ", ";
+                logs << std::endl << "fluxes : ";
+                for( size_t in=0; in<neighbourhood.size(); ++in )
+                    logs << fluxes[ in ] << ", ";
+                logs << std::endl;
+            }
+
+            std::vector<CellList_t> cl_to_send( n_neighbours );
+
+            bool balancing_done = false;
+            while( ! balancing_done ){
+             
+                // select neighbour with the highest needs of load
+                bool neighbour_found = false;
+                std::size_t requester   = 0;
+                int requested_load = 0;
+                for(std::size_t nbi=0; nbi<n_neighbours; ++nbi ){
+
+                    // skip neighbour that need to send load
+                    if( new_fluxes[ nbi ] >= 0 ) continue;
+
+                    // FIX: Add condition (&& interface not empty ) ?
+                    // Neighbourhood should have been updated in a way that an interface exists !
+                    if( - new_fluxes[ nbi ] > requested_load ){
+                        requested_load  = - new_fluxes[ nbi ];
+                        requester       = nbi;
+                        neighbour_found = true;
+                    }
+                }
+
+                if( ! neighbour_found ){
+                    { // debug 
+                        logs << "No more neighbour found " << std::endl;
+                    }
+
+                    balancing_done = true;
+                    break;
+                }
+
+                { // debug 
+                    logs << "Requester neighbour : " << neighbourhood[ requester ].rank << ", fluxes : " << requested_load << std::endl;
+                }
+
+                // select interval for this neighbour by moving in cartesian direction by one 
+                auto interface = samurai::cmptInterface<Mesh_t::dim, samurai::Direction_t::FACE>( mesh, neighbourhood[ requester ].mesh );
+
+                { // check emptyness of interface, if it is empty, then set fluxes for this neighbour to 0
+                    size_t nelement = 0;
+                    samurai::for_each_interval( interface, [&]( [[maybe_unused]] std::size_t level, [[maybe_unused]] const auto & interval, 
+                                                                [[maybe_unused]] const auto & index ){
+                        nelement += 1;
+                    });
+
+                    if( nelement == 0 ){
+                        new_fluxes[ requester ] = 0;
+
+                        { // debug
+                            std::cerr << fmt::format("\t> Process {}, Warning no interface with a requester neighbour # {}", world.rank(),
+                                                     neighbourhood[ requester ].rank ) << std::endl;
+                            logs << "Requester neighbour, no interface found, set fluxes to " << new_fluxes[ requester ] << std::endl;
+                        }
+                    }else{
+                        { // debug 
+                            logs << "Requester neighbour, interface  " << nelement << " intervals " << std::endl;
+                        }
+                    }
+                }
+
+                // go through interval on the interface and add as much as possible
+                // skip this to add the whole interface
+                CellList_t cl_for_neighbour;
+
+                size_t nbIntervalAdded = 0;
+                samurai::for_each_interval( interface, [&]( std::size_t level, const auto & interval, const auto & index ){
+
+                    if( new_fluxes[ requester ] < 0 ){
+                        cl_for_neighbour[ level ][ index ].add_interval( interval );
+                        // new_fluxes[ requester ] += 1; // here interval load == 1, no weight;
+                        nbIntervalAdded += 1;
+                    }
+
+                });
+
+                new_fluxes[ requester ] += nbIntervalAdded;
+
+                { // remove interval from current process mesh and add it to neighbour mesh local copy !
+                    CellArray_t ca_for_neighbour = { cl_for_neighbour, false };
+                    
+                    // mesh.remove( ca_for_neighbour );
+                    // neighbourhood[ requester ].mesh.merge( ca_for_neighbour );
+
+                    // update gobal list that will be sent to neighbour process
+                    samurai::for_each_interval( ca_for_neighbour, [&]( std::size_t level, const auto & interval, const auto & index ){
+                        cl_to_send[ requester ][ level ][ index ].add_interval( interval );
+                    });
+                }
+
+                { // debug 
+                    logs << "New flux for this neighbour : " << new_fluxes[ requester ] << std::endl;
+                }
+
+            }
+
+            /* ---------------------------------------------------------------------------------------------------------- */
+            /* ------- Data transfer between processes ------------------------------------------------------------------ */ 
+            /* ---------------------------------------------------------------------------------------------------------- */
+            
+            CellList_t new_cl, need_remove;
+
+            // at this point local mesh of neighbour are modified ( technically it should match what would results from send)
+            // and local mesh has been modified
+            for(std::size_t ni=0; ni<n_neighbours; ++ni ){
+                
+                if( fluxes [ ni ] == 0 ) continue; 
+
+                if( fluxes[ ni ] > 0 ) { // receive data
+                    CellArray_t to_rcv;
+                    world.recv( neighbourhood[ ni ].rank, 42, to_rcv );
+
+                    logs << fmt::format("\t>[load_balance_impl]::interval Rank # {} receiving {} cells from rank # {}", world.rank(), to_rcv.nb_cells(), neighbourhood[ ni ].rank) << std::endl;
+                    
+                    // old strategy
+                    // mesh.merge( to_rcv );
+
+                    // add to future mesh of current process
+                    samurai::for_each_interval(to_rcv,
+                             [&](std::size_t level, const auto& interval, const auto& index)
+                             {
+                                new_cl[ level ][ index ].add_interval( interval );
+                             });
+
+                }else{ // send data to
+                    CellArray_t to_send = { cl_to_send[ ni ], false };
+                    world.send( neighbourhood[ ni ].rank, 42, to_send );
+
+                    logs << fmt::format("\t>[load_balance_impl]::interval Rank # {} sending {} cells to rank # {}", world.rank(), to_send.nb_cells(), neighbourhood[ ni ].rank) << std::endl;
+
+                    samurai::for_each_interval( to_send,
+                            [&](std::size_t level, const auto& interval, const auto& index)
+                            {
+                                need_remove[ level ][ index ].add_interval( interval );
+                            });
+                }
+
+            }
+
+            /* ---------------------------------------------------------------------------------------------------------- */
+            /* ------- Construct new mesh for current process ----------------------------------------------------------- */ 
+            /* ---------------------------------------------------------------------------------------------------------- */
+
+            // add to new_cl interval that were not sent
+            CellArray_t need_remove_ca = { need_remove }; // to optimize
+            for( size_t level=mesh.min_level(); level<=mesh.max_level(); ++level ){
+                auto diff = samurai::difference( mesh[ mesh_id_t::cells ][ level ], need_remove_ca[ level ] );
+
+                diff([&]( auto & interval, auto & index ){
+                    new_cl[ level ][ index ].add_interval( interval );
+                });
+            }
+
+            Mesh_t new_mesh( new_cl, mesh );
+
+            return new_mesh;
+        }
+
+        template<class Mesh_t>
+        [[deprecated]]
+        void load_balance_impl2( Mesh_t & mesh ){
+
+            using interval_t      = typename Mesh_t::interval_t;
+            using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t;
+            using CellList_t      = typename Mesh_t::cl_type;
+            using CellArray_t     = samurai::CellArray<Mesh_t::dim>;
+            using mesh_id_t       = typename Mesh_t::mesh_id_t;
+
+            boost::mpi::communicator world;
+
+            std::ofstream logs;
+            // DEBUG 
+            logs.open( "log_" + std::to_string( world.rank() ) + ".dat", std::ofstream::app );
+            logs << "# New load balancing" << std::endl;
+
+            std::string smt = "# " + std::to_string( _rank ) + " [Diffusion_LoadBalancer_interval]::load_balance_impl ";
+            SAMURAI_TRACE( smt + "Entering function ..." );
+
+            // give access to rank & mesh of neighbour
+            std::vector<mpi_subdomain_t> & neighbourhood = mesh.mpi_neighbourhood();
+            size_t n_neighbours = neighbourhood.size();
+
+            // get the load to neighbours (geometrical neighbour)
+            std::vector<int> fluxes = samurai::cmptFluxes<samurai::BalanceElement_t::INTERVAL>( mesh );
+
+            {
+                logs << "load : " << samurai::cmptLoad<samurai::BalanceElement_t::INTERVAL>( mesh ) << std::endl;
+                logs << "nneighbours : " << n_neighbours << std::endl;
+                logs << "neighbours : ";
+                for( size_t in=0; in<neighbourhood.size(); ++in )
+                    logs << neighbourhood[ in ].rank << ", ";
+                logs << std::endl << "fluxes : ";
+                for( size_t in=0; in<neighbourhood.size(); ++in )
+                    logs << fluxes[ in ] << ", ";
+                logs << std::endl;
+            }
+
+            // pour échanger les intervalles, calcul des barycentres des cellules d'interfaces et pas le barycentre des
+            // domaines en eux mêmes !
+
+            // Interface for each neighbour as cell_array
+            auto interface = samurai::_computeCartesianInterface<Mesh_t::dim, samurai::Direction_t::FACE>( mesh );
+
+            // compute some point of reference in mesh and interval-based interface
+            // Coord_t barycenter = _cmpIntervalBarycenter( mesh[ mesh_id_t::cells ] );
+            Coord_t barycenter = samurai::_cmpCellBarycenter<Mesh_t::dim>( mesh[ mesh_id_t::cells ] );
+            logs << "Domain barycenter : " << fmt::format( " barycenter : ({}, {})", barycenter(0), barycenter(1) ) << std::endl;
+
+            std::vector<double> invloads;
+            double my_load = static_cast<double>( samurai::cmptLoad<samurai::BalanceElement_t::INTERVAL>( mesh ) );
+            boost::mpi::all_gather( world, my_load, invloads );
+            for(size_t il=0; il<invloads.size(); ++il ){
+                invloads[ il ] = 1. / invloads[ il ];
+            }
+
+            // std::vector<Coord_t> barycenter_interface_neighbours( n_neighbours );
+            std::vector<Coord_t> barycenter_neighbours( n_neighbours );
+
+            for(size_t nbi=0; nbi<n_neighbours; ++nbi ){
+                // barycenter_interface_neighbours[ nbi ] = _cmpIntervalBarycenter( interface[ nbi ] );
+                // barycenter_interface_neighbours[ nbi ] = _cmpCellBarycenter<dim>( interface[ nbi ] );
+                barycenter_neighbours[ nbi ] = samurai::_cmpCellBarycenter<Mesh_t::dim>( neighbourhood[ nbi ].mesh[ mesh_id_t::cells ] );
+
+                // debug
+                // auto s_ = fmt::format( "Barycenter neighbour : ({}, {})", 
+                //                barycenter_interface_neighbours[ nbi ]( 0 ),
+                //                barycenter_interface_neighbours[ nbi ]( 1 ) );
+
+                // logs << s_ << std::endl;
+            }
+
+            struct Data {
+                size_t level;
+                interval_t interval;
+                xt::xtensor_fixed<samurai::default_config::value_t, xt::xshape<Mesh_t::dim - 1>> indices;
+                int rank;
+            };
+
+            // build map of interval that needs to be sent
+            std::multimap<double, Data> repartition;
+
+            constexpr auto fdist = samurai::Distance_t::GRAVITY;
+
+            for_each_interval( mesh[ Mesh_t::mesh_id_t::cells ],
+                               [&]( std::size_t level, const auto& interval, const auto& index ){
+
+                // cartesian coordinates
+                auto mid_point = _getIntervalMidPoint( level, interval, index );
+
+                // process that might get the interval
+                int winner_id       = -1;
+                // double winner_dist = std::numeric_limits<double>::max();
+                double winner_dist = samurai::getDistance<Mesh_t::dim, fdist>( mid_point, barycenter ) * invloads[ _rank ];
+
+                // select the neighbour
+                for( size_t ni=0; ni<n_neighbours; ++ni ){ // for each neighbour
+
+                    auto neighbour_rank = neighbourhood[ ni ].rank;
+
+                    if( fluxes[ ni ] >= 0 ) continue; // skip neighbour that will recv
+
+                    // this might fix ilots but require neighbour update
+                    // double dist = std::min( distance_inf<dim>( mid_point, barycenter_interface_neighbours[ ni ] ),
+                    //                         distance_inf<dim>( mid_point, barycenter_neighbours[ ni ] ) );
+
+                    double dist = samurai::getDistance<Mesh_t::dim, fdist>( mid_point, barycenter_neighbours[ ni ] ) * invloads[ neighbour_rank ] ;
+                    // double dist = samurai::distance_inf<Mesh_t::dim>( mid_point, barycenter_interface_neighbours[ ni ] );
+
+                    if( dist < winner_dist ){
+                        winner_id   = ni;
+                        winner_dist = dist;
+                    }
+                    
+                }
+
+                if( winner_id >= 0 ){
+
+                    if( repartition.find( winner_dist ) != repartition.find( winner_dist ) ){
+                        std::cerr << "\t> WARNING: Key conflict for std::map !" << std::endl;
+                    }
+
+                    repartition.insert ( std::pair<double, Data>( winner_dist, Data { level, interval, index, winner_id } ) );
+
+                }
+
+            });
+
+            std::vector<CellList_t> cl_to_send( n_neighbours );
+            std::vector<CellArray_t> ca_to_send( n_neighbours );
+
+            // distribute intervals based on ordered distance to neighbours
+            // rank is not the rank but the offset in the neighbourhood
+            std::vector<int> given_ ( n_neighbours, 0 );
+            // for( auto & it : repartition ){
+            for( auto it = repartition.begin(); it != repartition.end(); it++ ){
+
+                auto nrank = neighbourhood[ it->second.rank ].rank;
+                auto lvl   = it->second.level;
+
+                logs << fmt::format("\t> Interval {} --> to rank {} ( level : {} , dist : {}", it->second.interval, nrank, lvl, it->first) << std::endl;
+
+                // shouldn't we give it to the second closest neighbour ?!
+                if( given_[ it->second.rank ] + it->second.interval.size() <= ( - fluxes[ it->second.rank ] ) ){
+                    cl_to_send[ it->second.rank ][ it->second.level ][ it->second.indices ].add_interval( it->second.interval );
+                    given_[ it->second.rank ] += it->second.interval.size();
+                }
+
+            }
+
+            for(size_t nbi=0; nbi<n_neighbours; ++nbi ) {
+                ca_to_send[ nbi ] = { cl_to_send[ nbi ], false };
+                logs << "\t\t> Number of cells to send to process # " << neighbourhood[ nbi ].rank
+                          << " : " << ca_to_send[ nbi ].nb_cells() << std::endl;
+            }
+
+            // actual data transfer occurs here 
+            for(size_t ni=0; ni<n_neighbours; ++ni ){
+                
+                if( fluxes [ ni ] == 0 ) continue; 
+
+                if( fluxes[ ni ] > 0 ) { // receive data
+                    samurai::CellArray<Mesh_t::dim> to_rcv;
+                    world.recv( neighbourhood[ ni ].rank, 42, to_rcv );
+                    mesh.merge( to_rcv );
+                }else{ // send data to 
+                    world.send( neighbourhood[ ni ].rank, 42, ca_to_send[ ni ] );
+                    mesh.remove( ca_to_send[ ni ] );
+                }
+            }
+
+            // update neighbour mesh
+            mesh.update_mesh_neighbour();
+        }
+
+};
+#endif
diff --git a/include/samurai/load_balancing_force.hpp b/include/samurai/load_balancing_force.hpp
new file mode 100644
index 000000000..7d9ff71ef
--- /dev/null
+++ b/include/samurai/load_balancing_force.hpp
@@ -0,0 +1,511 @@
+#pragma once
+
+#include <map>
+#include "load_balancing.hpp"
+
+#ifdef SAMURAI_WITH_MPI
+namespace Load_balancing{
+
+    class GlobalCriteria : public samurai::LoadBalancer<GlobalCriteria> {
+
+        private:
+            int _ndomains;
+            int _rank;
+
+            const double _unbalance_threshold = 0.05; // 5 %
+
+            template<class Mesh_t>
+            double getSurfaceOrVolume( Mesh_t & mesh ) const {
+
+                double s_ = 0.;
+                for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto& cell ){
+                    
+                    double s = 1.;
+
+                    for(size_t idim=0; idim<Mesh_t::dim; ++idim){
+                    s *= samurai::cell_length( cell.level );
+                    }
+
+                    s_ += s;
+                });
+
+                return s_;
+            }
+
+        public:
+
+            GlobalCriteria() {
+#ifdef SAMURAI_WITH_MPI
+                boost::mpi::communicator world;
+                _ndomains = world.size();
+                _rank     = world.rank();
+#else
+                _ndomains = 1;
+                _rank     = 0;
+#endif
+            }
+
+            inline std::string getName() const { return "Gravity_LB"; } 
+
+            template <class Mesh_t>
+            bool require_balance_impl( Mesh_t & mesh ) {
+
+                boost::mpi::communicator world;
+
+                // logs << fmt::format("\n# [SFC_LoadBalancer_interval::Morton] required_balance_impl ") << std::endl;
+
+                double nbCells_tot = 0;
+                std::vector<double> nbCellsPerProc;
+                boost::mpi::all_gather( world, static_cast<double>( mesh.nb_cells( Mesh_t::mesh_id_t::cells ) ), nbCellsPerProc );
+
+                for(size_t ip=0; ip<nbCellsPerProc.size(); ++ip ) {
+                    nbCells_tot += nbCellsPerProc[ ip ];
+                }
+
+                // no weight while computing load 
+                double dc = nbCells_tot  / static_cast<double> ( world.size() );
+
+                for(size_t ip=0; ip<nbCellsPerProc.size(); ++ip ) {
+                    double diff = std::abs( nbCellsPerProc[ ip ] - dc ) / dc;
+
+                    if( diff > _unbalance_threshold ) return true;
+                }
+
+                return false;
+            }
+
+            template<class Mesh_t>
+            auto reordering_impl( Mesh_t & mesh ){
+                auto flags = samurai::make_field<int, 1>("rank", mesh);
+                flags.fill( _rank );
+                return flags;
+            }
+
+            template<class Mesh_t>
+            auto load_balance_impl( Mesh_t & mesh ){
+
+                using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t;
+                using Coord_t = xt::xtensor_fixed<double, xt::xshape<Mesh_t::dim>>;
+
+                boost::mpi::communicator world;
+
+                // For debug purpose
+                std::ofstream logs;
+                logs.open( "log_" + std::to_string( world.rank() ) + ".dat", std::ofstream::app );
+                logs << "# New load balancing" << std::endl;
+
+                // give access to rank & mesh of neighbour
+                std::vector<mpi_subdomain_t> & neighbourhood = mesh.mpi_neighbourhood();
+                
+                std::size_t n_neighbours = neighbourhood.size();
+
+                std::vector<double> loads;
+                double my_load = static_cast<double>( samurai::cmptLoad<samurai::BalanceElement_t::CELL>( mesh ) );
+                boost::mpi::all_gather( world, my_load, loads );
+
+                // get the load to neighbours (geometrical neighbour) with 5 iterations max
+                std::vector<int> fluxes = samurai::cmptFluxes<samurai::BalanceElement_t::CELL>( mesh, 1 );
+
+                {
+                    logs << "load : " << my_load << std::endl;
+                    logs << "nneighbours : " << n_neighbours << std::endl;
+                    logs << "neighbours : ";
+                    for( size_t in=0; in<neighbourhood.size(); ++in )
+                        logs << neighbourhood[ in ].rank << ", ";
+                    logs << std::endl << "fluxes : ";
+                    for( size_t in=0; in<neighbourhood.size(); ++in )
+                        logs << fluxes[ in ] << ", ";
+                    logs << std::endl;
+                }
+
+                // Interface for each neighbour: std::vector<Cell_array_t>
+                auto interface = samurai::_computeCartesianInterface<Mesh_t::dim, samurai::Direction_t::FACE_AND_DIAG>( mesh );
+
+                // Invalidate fluxes with non adjacents neighbours, should not happen or might happen if load balancing exchange does not consider some
+                // direction.
+                std::vector<size_t> ncells_interface ( n_neighbours, 0 );
+                for(size_t ni=0; ni<n_neighbours; ni++){
+                    samurai::for_each_interval( interface[ ni ], [&]([[maybe_unused]] std::size_t level, [[maybe_unused]] const auto & interval, 
+                                                        [[maybe_unused]] const auto & index ){
+                        ncells_interface[ ni ] ++;
+                    });
+
+                    if( ncells_interface[ ni ] == 0 ) fluxes[ ni ] = 0;
+                }
+
+                // bary center of current mesh - no weight on cells
+                // Coord_t barycenter = _cmpIntervalBarycenter( mesh[ mesh_id_t::cells ] );
+                Coord_t barycenter = samurai::_cmpCellBarycenter<Mesh_t::dim>( mesh );
+                logs << "Domain barycenter : " << fmt::format( " barycenter : ({}, {})", barycenter(0), barycenter(1) ) << std::endl;
+
+                // std::vector<Coord_t> barycenter_interface_neighbours( n_neighbours );
+                std::vector<Coord_t> barycenter_neighbours( n_neighbours );
+                std::vector<double> sv( n_neighbours );
+
+                for(size_t nbi=0; nbi<n_neighbours; ++nbi ){
+                    // barycenter_interface_neighbours[ nbi ] = _cmpIntervalBarycenter( interface[ nbi ] );
+                    // barycenter_interface_neighbours[ nbi ] = _cmpCellBarycenter<Mesh_t::dim>( interface[ nbi ] );
+                    barycenter_neighbours[ nbi ] = samurai::_cmpCellBarycenter<Mesh_t::dim>( neighbourhood[ nbi ].mesh );
+                    
+                    // surface or volume depending on Mesh_t::dim
+                    sv[ nbi ] = getSurfaceOrVolume( neighbourhood[ nbi ].mesh );
+
+                    // debug
+                    auto s_ = fmt::format( "Barycenter neighbour # {}: ({}, {})", neighbourhood[ nbi ].rank,
+                                barycenter_neighbours[ nbi ]( 0 ),
+                                barycenter_neighbours[ nbi ]( 1 ) );
+
+                    logs << s_ << std::endl;
+                }
+
+                // build map of interval that needs to be sent. Warning, it does not work with classical std::map !!
+                auto flags = samurai::make_field<int, 1>("rank", mesh);
+                flags.fill( world.rank() );
+
+                constexpr auto fdist = samurai::Distance_t::L1;
+
+                double currentSV = getSurfaceOrVolume( mesh );
+
+                std::vector<int> mload( static_cast<size_t>( world.size() ), 0 );
+                for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto& cell ){
+
+                    auto cc = cell.center();
+
+                    // process that might get the interval
+                    int winner_id = -1;
+
+                    // double winner_dist = std::numeric_limits<double>::max();
+                    // double winner_dist = samurai::getDistance<dim, fdist>( cell, barycenter ) / loads[ world.rank() ];
+                    
+                    // double coeff_current = currentSV / loads[ static_cast<size_t>( world.rank() ) ];
+                    double coeff_current = 1.; //  loads[ static_cast<size_t>( world.rank() ) ];
+                    double winner_dist = samurai::getDistance<Mesh_t::dim, fdist>( cc, barycenter ) * coeff_current;
+
+                    // logs << fmt::format("\t\t> Cell : ({},{}), current mesh dist : {}", cc(0), cc(1), winner_dist ) << std::endl;
+
+                    // select the neighbour
+                    for( std::size_t ni=0; ni<n_neighbours; ++ni ){ // for each neighbour
+
+                        auto neighbour_rank = static_cast<std::size_t>( neighbourhood[ ni ].rank );
+
+                        if( fluxes[ ni ] >= 0 ) continue; // skip neighbour that will recv
+
+                        // this might fix ilots but require neighbour update
+                        // double dist = std::min( distance_inf<dim>( mid_point, barycenter_interface_neighbours[ ni ] ),
+                        //                         distance_inf<dim>( mid_point, barycenter_neighbours[ ni ] ) );
+
+                        // double dist = samurai::getDistance<dim, fdist>( cell, barycenter_interface_neighbours[ ni ] ) / loads[ neighbour_rank ];
+                        // double coeff = sv[ ni ] / ( loads[ neighbour_rank ] ) ; // / sv[ ni ];
+                        double coeff = 1.; // loads[ neighbour_rank ] ; // mload[ neighbour_rank ];
+                        double dist = samurai::getDistance<Mesh_t::dim, fdist>( cell.center(), barycenter_neighbours[ ni ] ) * coeff;
+
+                        // logs << fmt::format("\t\t\t> Dist to neighbour {} : {}", neighbour_rank, dist ) << std::endl;
+
+                        if( dist < winner_dist && ncells_interface[ ni ] > 0 ){
+                            winner_id   = static_cast<int>( ni );
+                            winner_dist = dist;
+
+                            mload[ neighbour_rank ] += 1;
+
+                            // if( mload[ neighbour_rank ] >= ( - fluxes[ ni ] ) ) fluxes[ ni ] = 0;
+                        }
+                        
+                    }
+
+                    // logs << fmt::format("\t\t\t> Cell given to process #{}", neighbourhood[ static_cast<size_t>( winner_id ) ].rank ) << std::endl;
+
+                    if( winner_id >= 0 ){
+                        assert( winner_id < neighbourhood.size() );
+                        flags[ cell ] = neighbourhood[ static_cast<size_t>( winner_id ) ].rank; 
+                    }
+
+                });
+
+                return flags;
+            }
+
+            template <size_t dim, samurai::Weight w=samurai::Weight::None, class Mesh_t, class Field_t>
+            xt::xtensor_fixed<double, xt::xshape<dim>> _barycenterWithFlags( const Mesh_t & mesh, const Field_t & flags )
+            {
+                using Coord_t = xt::xtensor_fixed<double, xt::xshape<dim>>;
+
+                Coord_t bary;
+                bary.fill(0.);
+
+                double wght_tot = 0.;
+                samurai::for_each_cell(mesh, [&, this]( const auto & cell ) {
+                    double wght = 1.;
+                    
+                    if constexpr( w == samurai::Weight::OnSmall ){
+                        wght = 1. / static_cast<double>( 1 << (mesh.max_level() - cell.level) );
+                    }
+
+                    if constexpr( w == samurai::Weight::OnLarge ){
+                        wght = 1. / static_cast<double>( 1 << cell.level );
+                    }
+
+                    if( flags[ cell ] == this->_rank ) {
+                        auto cc = cell.center();
+
+                        for (size_t idim = 0; idim < dim; ++idim)
+                        {
+                            bary(idim) += cc(idim) * wght;
+                        }
+
+                        wght_tot += wght;
+                    }
+
+                });
+
+                wght_tot = std::max(wght_tot, 1e-12);
+                for (size_t idim = 0; idim < dim; ++idim)
+                {
+                    bary(idim) /= wght_tot;
+                }
+
+                return bary;
+            }
+
+            template <size_t dim, samurai::Weight w=samurai::Weight::None, class Mesh_t, class Field_t>
+            xt::xtensor_fixed<double, xt::xshape<dim>> _barycenterWithMeshWithFlags( const Mesh_t & mesh, const Field_t & flags, int rank, const Mesh_t & otherMesh )
+            {
+                using Coord_t = xt::xtensor_fixed<double, xt::xshape<dim>>;
+
+                Coord_t bary;
+                bary.fill(0.);
+
+                double wght_tot = 0.;
+                samurai::for_each_cell(mesh, [&]( const auto & cell ) {
+                    double wght = 1.;
+                    
+                    if constexpr( w == samurai::Weight::OnSmall ){
+                        wght = 1. / static_cast<double>( 1 << (mesh.max_level() - cell.level) );
+                    }
+
+                    if constexpr( w == samurai::Weight::OnLarge ){
+                        wght = 1. / static_cast<double>( 1 << cell.level );
+                    }
+
+                    if( flags[ cell ] == rank ) {
+                        auto cc = cell.center();
+
+                        for (size_t idim = 0; idim < dim; ++idim)
+                        {
+                            bary(idim) += cc(idim) * wght;
+                        }
+
+                        wght_tot += wght;
+                    }
+
+                });
+
+                samurai::for_each_cell(otherMesh, [&]( const auto & cell ) {
+                    double wght = 1.;
+                    
+                    if constexpr( w == samurai::Weight::OnSmall ){
+                        wght = 1. / static_cast<double>( 1 << (mesh.max_level() - cell.level) );
+                    }
+
+                    if constexpr( w == samurai::Weight::OnLarge ){
+                        wght = 1. / static_cast<double>( 1 << cell.level );
+                    }
+
+                    auto cc = cell.center();
+
+                    for (size_t idim = 0; idim < dim; ++idim)
+                    {
+                        bary(idim) += cc(idim) * wght;
+                    }
+
+                    wght_tot += wght;
+
+                });
+
+                wght_tot = std::max(wght_tot, 1e-12);
+                for (size_t idim = 0; idim < dim; ++idim)
+                {
+                    bary(idim) /= wght_tot;
+                }
+
+                return bary;
+            }
+
+            template<class Mesh_t>
+            auto load_balance_impl2( Mesh_t & mesh ){
+
+                using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t;
+                using Coord_t = xt::xtensor_fixed<double, xt::xshape<Mesh_t::dim>>;
+
+                boost::mpi::communicator world;
+
+                logs << "# New load balancing" << std::endl;
+
+                // build map of interval that needs to be sent. Warning, it does not work with classical std::map !!
+                auto flags = samurai::make_field<int, 1>("rank", mesh);
+                flags.fill( world.rank() );
+
+                // give access to rank & mesh of neighbour
+                std::vector<mpi_subdomain_t> & neighbourhood = mesh.mpi_neighbourhood();
+                
+                std::size_t n_neighbours = neighbourhood.size();
+
+                std::vector<double> loads;
+                double my_load = static_cast<double>( samurai::cmptLoad<samurai::BalanceElement_t::CELL>( mesh ) );
+                boost::mpi::all_gather( world, my_load, loads );
+
+                // Compute fluxes based on neighbourdhood graph
+                std::vector<int> fluxes = samurai::cmptFluxes<samurai::BalanceElement_t::CELL>( mesh, 1 );
+
+                {
+                    logs << "load : " << my_load << std::endl;
+                    logs << "nneighbours : " << n_neighbours << std::endl;
+                    logs << "neighbours : ";
+                    for( size_t in=0; in<neighbourhood.size(); ++in )
+                        logs << neighbourhood[ in ].rank << ", ";
+                    logs << std::endl << "fluxes : ";
+                    for( size_t in=0; in<neighbourhood.size(); ++in )
+                        logs << fluxes[ in ] << ", ";
+                    logs << std::endl;
+                }
+
+                // Interface for each neighbour: std::vector<Cell_array_t>
+                auto interface = samurai::_computeCartesianInterface<Mesh_t::dim, samurai::Direction_t::FACE_AND_DIAG>( mesh );
+
+                // Invalidate fluxes with non adjacents neighbours, should not happen or might happen if load balancing exchange does not consider some
+                // direction or neighbourdhood is overkill (all meshes for example)
+                std::vector<size_t> ncells_interface ( n_neighbours, 0 );
+                for(size_t ni=0; ni<n_neighbours; ni++){
+                    samurai::for_each_interval( interface[ ni ], [&]([[maybe_unused]] std::size_t level, [[maybe_unused]] const auto & interval, 
+                                                        [[maybe_unused]] const auto & index ){
+                        ncells_interface[ ni ] ++;
+                    });
+
+                    if( ncells_interface[ ni ] == 0 ) fluxes[ ni ] = 0;
+                }
+
+                // bary center of current mesh - no weight on cells
+                // Coord_t barycenter = _cmpIntervalBarycenter( mesh[ mesh_id_t::cells ] );
+                Coord_t barycenter = _barycenterWithFlags<Mesh_t::dim>( mesh, flags );
+                logs << "Domain barycenter : " << fmt::format( " barycenter : ({}, {})", barycenter(0), barycenter(1) ) << std::endl;
+
+                // std::vector<Coord_t> barycenter_interface_neighbours( n_neighbours );
+                std::vector<Coord_t> barycenter_neighbours( n_neighbours );
+
+                for(size_t nbi=0; nbi<n_neighbours; ++nbi ){
+                    barycenter_neighbours[ nbi ] = _barycenterWithMeshWithFlags<Mesh_t::dim>( mesh, flags, neighbourhood[ nbi ].rank, neighbourhood[ nbi ].mesh );
+
+                    // debug
+                    auto s_ = fmt::format( "Barycenter neighbour # {}: ({}, {})", neighbourhood[ nbi ].rank,
+                                barycenter_neighbours[ nbi ]( 0 ),
+                                barycenter_neighbours[ nbi ]( 1 ) );
+
+                    logs << s_ << std::endl;
+                }
+
+                constexpr auto fdist = samurai::Distance_t::L1;
+
+                double currentSV = getSurfaceOrVolume( mesh );
+
+                std::vector<int> mload( static_cast<size_t>( world.size() ), 0 );
+                for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto& cell ){
+
+                    auto cc = cell.center();
+
+                    // process that might get the interval
+                    int winner_id = -1;                   
+                    double winner_dist = samurai::getDistance<Mesh_t::dim, fdist>( cc, barycenter ) / currentSV;
+
+                    // logs << fmt::format("\t\t> Cell : ({},{}), current mesh dist : {}", cc(0), cc(1), winner_dist ) << std::endl;
+
+                    // select the neighbour
+                    for( std::size_t ni=0; ni<n_neighbours; ++ni ){ // for each neighbour
+
+                        auto neighbour_rank = static_cast<std::size_t>( neighbourhood[ ni ].rank );
+
+                        if( fluxes[ ni ] >= 0 ) continue; // skip neighbour that will recv
+
+                        double dist = samurai::getDistance<Mesh_t::dim, fdist>( cell.center(), barycenter_neighbours[ ni ] );
+
+                        // logs << fmt::format("\t\t\t> Dist to neighbour {} : {}", neighbour_rank, dist ) << std::endl;
+
+                        if( dist < winner_dist && ncells_interface[ ni ] > 0 ){
+                            winner_id   = static_cast<int>( ni );
+                            winner_dist = dist;
+                        }
+                        
+                    }
+
+                    if( winner_id >= 0 ){
+                        assert( winner_id < neighbourhood.size() );
+                        flags[ cell ] = neighbourhood[ static_cast<size_t>( winner_id ) ].rank;
+
+                        mload[ neighbourhood[ winner_id ].rank ] ++;
+                    }
+
+                });
+
+                // update barycenter
+                barycenter = _barycenterWithFlags<Mesh_t::dim>( mesh, flags );
+                logs << "Updated domain barycenter : " << fmt::format( " barycenter : ({}, {})", barycenter(0), barycenter(1) ) << std::endl;
+                for(size_t nbi=0; nbi<n_neighbours; ++nbi ){
+                    barycenter_neighbours[ nbi ] = _barycenterWithMeshWithFlags<Mesh_t::dim>( mesh, flags, neighbourhood[ nbi ].rank, neighbourhood[ nbi ].mesh );
+                    
+                    // debug
+                    auto s_ = fmt::format( "Updated Barycenter neighbour # {}: ({}, {})", neighbourhood[ nbi ].rank,
+                                barycenter_neighbours[ nbi ]( 0 ),
+                                barycenter_neighbours[ nbi ]( 1 ) );
+
+                    logs << s_ << std::endl;
+                }
+
+                // second pass
+                for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto& cell ){
+
+                    auto cc = cell.center();
+
+                    // process that might get the interval
+                    int winner_id = -1;
+                    double winner_dist = samurai::getDistance<Mesh_t::dim, fdist>( cc, barycenter ) / currentSV;
+
+                    // logs << fmt::format("\t\t> Cell : ({},{}), current mesh dist : {}", cc(0), cc(1), winner_dist ) << std::endl;
+
+                    // select the neighbour
+                    for( std::size_t ni=0; ni<n_neighbours; ++ni ){ // for each neighbour
+
+                        auto neighbour_rank = static_cast<std::size_t>( neighbourhood[ ni ].rank );
+
+                        if( fluxes[ ni ] + mload[ ni ] >= 0 ) continue; // skip neighbour that will recv
+
+                        double dist = samurai::getDistance<Mesh_t::dim, fdist>( cell.center(), barycenter_neighbours[ ni ] );
+
+                        // logs << fmt::format("\t\t\t> Dist to neighbour {} : {}", neighbour_rank, dist ) << std::endl;
+
+                        if( dist < winner_dist && ncells_interface[ ni ] > 0 ){
+                            winner_id   = static_cast<int>( ni );
+                            winner_dist = dist;
+                        }
+                        
+                    }
+
+                    if( winner_id >= 0 ){
+                        assert( winner_id < neighbourhood.size() );
+                        flags[ cell ] = neighbourhood[ static_cast<size_t>( winner_id ) ].rank;
+
+                        mload[ neighbourhood[ winner_id ].rank ] ++;
+                    }
+
+                });
+
+                for( std::size_t ni=0; ni<n_neighbours; ++ni ){
+                    if( fluxes[ ni ] + mload[ ni ] < 0 ){
+                        std::cerr << "Neighbour # " <<  neighbourhood[ ni ].rank << " not full-filled" << std::endl;
+                    }
+                }
+
+                return flags;
+            }
+            
+    };
+
+}
+#endif
diff --git a/include/samurai/load_balancing_life.hpp b/include/samurai/load_balancing_life.hpp
new file mode 100644
index 000000000..b9437c2e8
--- /dev/null
+++ b/include/samurai/load_balancing_life.hpp
@@ -0,0 +1,84 @@
+#pragma once
+
+#include <map>
+#include "load_balancing.hpp"
+#include <samurai/field.hpp>
+
+// for std::sort
+#include <algorithm>
+#ifdef SAMURAI_WITH_MPI
+namespace Load_balancing{
+
+    class Life : public samurai::LoadBalancer<Life> {
+
+        private:
+
+            int _ndomains;
+            int _rank;
+
+        public:
+
+            Life() {
+
+    #ifdef SAMURAI_WITH_MPI
+                boost::mpi::communicator world;
+                _ndomains = world.size();
+                _rank     = world.rank();
+    #else
+                _ndomains = 1;
+                _rank     = 0;
+    #endif
+            }
+
+            inline std::string getName() const { return "life"; }
+
+            template<class Mesh_t>
+            auto reordering_impl( Mesh_t & mesh ) {
+                auto flags = samurai::make_field<int, 1>("ordering_flag", mesh);
+                flags.fill( _rank );
+
+                return flags;
+            }
+
+            template<class Mesh_t>
+            auto load_balance_impl( Mesh_t & mesh ){
+
+                using mpi_subdomain_t = typename Mesh_t::mpi_subdomain_t;
+                // using CellList_t      = typename Mesh_t::cl_type;
+                // using mesh_id_t       = typename Mesh_t::mesh_id_t;
+
+                // using Coord_t = xt::xtensor_fixed<double, xt::xshape<Mesh_t::dim>>;
+                // using Stencil = xt::xtensor_fixed<int, xt::xshape<Mesh_t::dim>>;
+
+                boost::mpi::communicator world;
+
+                // For debug
+                // std::ofstream logs; 
+                // logs.open( fmt::format("log_{}.dat", world.rank()), std::ofstream::app );
+                logs << fmt::format("> New load-balancing using {} ", getName() ) << std::endl;
+
+                auto flags = samurai::make_field<int, 1>("balancing_flags", mesh);
+                flags.fill( _rank );
+                
+                // neighbourhood
+                std::vector<mpi_subdomain_t> & neighbourhood = mesh.mpi_neighbourhood();
+
+                // fluxes to each neighbour
+                std::vector<int> fluxes = samurai::cmptFluxes<samurai::BalanceElement_t::CELL>( mesh, 5 );
+
+                // cpy that can be modified
+                std::vector<int> new_fluxes( fluxes );
+
+                // Loads of each processes
+                std::vector<int> loads;
+                int my_load = static_cast<int>( samurai::cmptLoad<samurai::BalanceElement_t::CELL>( mesh ) );
+                boost::mpi::all_gather( world, my_load, loads );
+
+                // samurai::cellExists();
+
+                return flags;
+            }
+
+    };
+}
+#endif
diff --git a/include/samurai/load_balancing_metis.hpp b/include/samurai/load_balancing_metis.hpp
new file mode 100644
index 000000000..072a376de
--- /dev/null
+++ b/include/samurai/load_balancing_metis.hpp
@@ -0,0 +1,121 @@
+// Copyright 2018-2024 the samurai's authors
+// SPDX-License-Identifier:  BSD-3-Clause
+
+#pragma once
+
+#include <parmetis.h>
+
+#include "field.hpp"
+#include "load_balancing_utils.hpp"
+
+namespace samurai::Load_balancing
+{
+    class Metis : public samurai::LoadBalancer<Metis>
+    {
+      private:
+
+        int _ndomains;
+        int _rank;
+
+        template <class Mesh_t, class Stencil, class Field_t>
+        void propagate(const Mesh_t&, const Stencil&, Field_t&, int, int&) const
+        {
+        }
+
+      public:
+
+        Metis()
+        {
+#ifdef SAMURAI_WITH_MPI
+            boost::mpi::communicator world;
+            _ndomains = world.size();
+            _rank     = world.rank();
+#else
+            _ndomains = 1;
+            _rank     = 0;
+#endif
+        }
+
+        inline std::string getName() const
+        {
+            return "metis";
+        }
+
+        template <class Mesh_t>
+        Mesh_t load_balance_impl(Mesh_t& mesh)
+        {
+            constexpr std::size_t dim = Mesh_t::dim;
+            using mesh_id_t           = typename Mesh_t::mesh_id_t;
+            auto graph                = build_graph(mesh);
+
+            boost::mpi::communicator world;
+
+            idx_t ndims   = dim;
+            idx_t wgtflag = 1;
+            idx_t numflag = 0;
+            idx_t ncon    = 1;
+            idx_t nparts  = world.size();
+            std::vector<real_t> tpwgts(nparts, 1.0 / nparts);
+            real_t ubvec[]    = {1.05};
+            idx_t options[10] = {0};
+            idx_t edgecut     = 0;
+            std::vector<idx_t> part(mesh[mesh_id_t::cells].nb_cells(), 17);
+            MPI_Comm mpi_comm = MPI_COMM_WORLD;
+
+            // ParMETIS_V3_PartKway(global_index_offset.data(),
+            //                      xadj.data(),
+            //                      adjncy.data(),
+            //                      NULL,
+            //                      NULL, // adjwgt.data(),
+            //                      &wgtflag,
+            //                      &numflag,
+            //                      &ncon,
+            //                      &nparts,
+            //                      tpwgts.data(),
+            //                      ubvec,
+            //                      options,
+            //                      &edgecut,
+            //                      part.data(),
+            //                      &mpi_comm);
+
+            ParMETIS_V3_PartGeomKway(graph.global_index_offset.data(),
+                                     graph.xadj.data(),
+                                     graph.adjncy.data(),
+                                     NULL,
+                                     graph.adjwgt.data(),
+                                     &wgtflag,
+                                     &numflag,
+                                     &ndims,
+                                     graph.xyz.data(),
+                                     &ncon,
+                                     &nparts,
+                                     tpwgts.data(),
+                                     ubvec,
+                                     options,
+                                     &edgecut,
+                                     part.data(),
+                                     &mpi_comm);
+
+            // ParMETIS_V3_AdaptiveRepart
+
+            /* ---------------------------------------------------------------------------------------------------------- */
+            /* ------- Construct new mesh for current process ----------------------------------------------------------- */
+            /* ---------------------------------------------------------------------------------------------------------- */
+
+            auto partition_field = make_field<idx_t, 1>("partition", mesh);
+            std::size_t ipart    = 0;
+            for_each_cell(mesh,
+                          [&](const auto& cell)
+                          {
+                              partition_field[cell] = part[ipart++];
+                          });
+            // std::cout << "coucou" << std::endl;
+            save("metis_partition", mesh, partition_field);
+            // std::cout << "coucou 2" << std::endl;
+
+            // Mesh_t new_mesh(new_cl, mesh);
+
+            return mesh;
+        }
+    };
+}
diff --git a/include/samurai/load_balancing_scotch.hpp b/include/samurai/load_balancing_scotch.hpp
new file mode 100644
index 000000000..83f36c6cb
--- /dev/null
+++ b/include/samurai/load_balancing_scotch.hpp
@@ -0,0 +1,126 @@
+// Copyright 2018-2024 the samurai's authors
+// SPDX-License-Identifier:  BSD-3-Clause
+
+#pragma once
+
+#include "ptscotch.h"
+
+#include "field.hpp"
+#include "load_balancing.hpp"
+#include "load_balancing_utils.hpp"
+#include "mesh_interval.hpp"
+#include "stencil.hpp"
+
+using namespace xt::placeholders;
+
+namespace samurai::Load_balancing
+{
+    class Scotch : public samurai::LoadBalancer<Scotch>
+    {
+      private:
+
+        int _ndomains;
+        int _rank;
+
+        template <class Mesh_t, class Stencil, class Field_t>
+        void propagate(const Mesh_t&, const Stencil&, Field_t&, int, int&) const
+        {
+        }
+
+      public:
+
+        Scotch()
+        {
+#ifdef SAMURAI_WITH_MPI
+            boost::mpi::communicator world;
+            _ndomains = world.size();
+            _rank     = world.rank();
+#else
+            _ndomains = 1;
+            _rank     = 0;
+#endif
+        }
+
+        inline std::string getName() const
+        {
+            return "Scotch";
+        }
+
+        template <class Mesh_t>
+        Mesh_t& reordering_impl(Mesh_t& mesh)
+        {
+            return mesh;
+        }
+
+        template <class Mesh_t>
+        Mesh_t load_balance_impl(Mesh_t& mesh)
+        {
+            constexpr std::size_t dim = Mesh_t::dim;
+            using mesh_id_t           = typename Mesh_t::mesh_id_t;
+            auto graph                = build_graph(mesh);
+
+            boost::mpi::communicator world;
+            SCOTCH_Dgraph grafdat;
+            SCOTCH_dgraphInit(&grafdat, MPI_COMM_WORLD);
+
+            // std::cout << "xadj: " << xt::adapt(graph.xadj) << std::endl;
+            // std::cout << "adjnct: " << xt::adapt(graph.adjncy) << std::endl;
+            SCOTCH_dgraphBuild(&grafdat,              // grafdat
+                               0,                     // baseval, c-style numbering
+                               graph.xadj.size() - 1, // vertlocnbr, nCells
+                               graph.xadj.size() - 1, // vertlocmax
+                               const_cast<SCOTCH_Num*>(graph.xadj.data()),
+                               nullptr,
+
+                               nullptr, // veloloctab, vtx weights
+                               nullptr, // vlblloctab
+
+                               graph.adjncy.size(),                          // edgelocnbr, number of arcs
+                               graph.adjncy.size(),                          // edgelocsiz
+                               const_cast<SCOTCH_Num*>(graph.adjncy.data()), // edgeloctab
+                               nullptr,                                      // edgegsttab
+                               nullptr                                       // edlotab, edge weights
+            );
+            SCOTCH_dgraphCheck(&grafdat);
+            SCOTCH_Strat strategy;
+            SCOTCH_stratInit(&strategy);
+
+            SCOTCH_Arch archdat;
+            SCOTCH_archInit(&archdat);
+            // SCOTCH_archCmplt(&archdat, 1);
+
+            std::vector<SCOTCH_Num> part(mesh[mesh_id_t::cells].nb_cells(), 17);
+
+            SCOTCH_Dmapping mappdat;
+            SCOTCH_dgraphMapInit(&grafdat, &mappdat, &archdat, part.data());
+
+            SCOTCH_dgraphPart(&grafdat,
+                              2,
+                              &strategy,  // const SCOTCH_Strat *
+                              part.data() // parttab
+            );
+            SCOTCH_archExit(&archdat);
+            SCOTCH_stratExit(&strategy);
+            SCOTCH_dgraphExit(&grafdat);
+
+            /* ---------------------------------------------------------------------------------------------------------- */
+            /* ------- Construct new mesh for current process ----------------------------------------------------------- */
+            /* ---------------------------------------------------------------------------------------------------------- */
+
+            auto partition_field = make_field<std::size_t, 1>("partition", mesh);
+            std::size_t ipart    = 0;
+            for_each_cell(mesh[mesh_id_t::cells],
+                          [&](const auto& cell)
+                          {
+                              partition_field[cell] = part[ipart++];
+                          });
+            // std::cout << "coucou" << std::endl;
+            save("metis_partition", mesh, partition_field);
+            // std::cout << "coucou 2" << std::endl;
+
+            // Mesh_t new_mesh(new_cl, mesh);
+
+            return mesh;
+        }
+    };
+}
diff --git a/include/samurai/load_balancing_sfc.hpp b/include/samurai/load_balancing_sfc.hpp
new file mode 100644
index 000000000..a1ed85b4f
--- /dev/null
+++ b/include/samurai/load_balancing_sfc.hpp
@@ -0,0 +1,284 @@
+#pragma once
+
+#include "assertLogTrace.hpp"
+
+#include "load_balancing.hpp"
+
+#ifdef SAMURAI_WITH_MPI
+template <int dim, class SFC_type_t>
+class SFC_LoadBalancer_interval : public samurai::LoadBalancer<SFC_LoadBalancer_interval<dim, SFC_type_t>>
+{
+  private:
+
+    SFC_type_t _sfc;
+    int _ndomains;
+    int _rank;
+
+    const double _unbalance_threshold = 0.05; // 5 %
+
+  public:
+
+    using samurai::LoadBalancer<SFC_LoadBalancer_interval<dim, SFC_type_t>>::logs;
+
+    SFC_LoadBalancer_interval()
+    {
+#ifdef SAMURAI_WITH_MPI
+        boost::mpi::communicator world;
+        _ndomains = world.size();
+        _rank     = world.rank();
+#else
+        _ndomains = 1;
+        _rank     = 0;
+#endif
+    }
+
+    inline std::string getName() const
+    {
+        return "SFC_" + _sfc.getName() + "_LB";
+    }
+
+    /**
+    * Re-order cells on MPI processes based on the given SFC curve. This need to be
+    * called once at the beginning (unless ordering is fixed in partition mesh).
+    * After, load balancing will exchange data based on SFC order and thus only
+    * boundaries will be moved.
+    *
+    */
+    template<class Mesh_t>
+    auto reordering_impl( Mesh_t & mesh ) {
+
+        boost::mpi::communicator world;
+
+        // For debug
+        // std::ofstream logs;
+        // logs.open("log_" + std::to_string(_rank) + ".dat", std::ofstream::app);
+        logs << "# [SFC_LoadBalancer_interval::Morton] Reordering cells using SFC" << std::endl;
+
+        // SFC 1D key for cells
+        auto sfc_keys = samurai::make_field<SFC_key_t, 1>( "keys", mesh );
+        sfc_keys.fill( 0 );
+
+        auto flags = samurai::make_field<int, 1>("rank", mesh);
+        flags.fill( world.rank() );
+         
+        logs << fmt::format("\t> Computing SFC ({}) 1D indices ( cell ) ... ", _sfc.getName() ) << std::endl;
+
+        SFC_key_t mink = std::numeric_limits<SFC_key_t>::max(), maxk = std::numeric_limits<SFC_key_t>::min();
+
+        samurai::for_each_cell(mesh[Mesh_t::mesh_id_t::cells], [&]( const auto & cell ) {
+
+            // this is where things can get nasty, we expect indices to be positive values !!
+            xt::xtensor_fixed<uint32_t, xt::xshape<Mesh_t::dim>> ijk;
+            for (size_t idim = 0; idim < dim; ++idim) {
+
+                // FIX need shift to get only positive index
+                assert( cell.indices( idim ) >= 0 );
+                ijk( idim ) = static_cast<uint32_t>( cell.indices( idim ) ) << ( mesh.max_level() - cell.level );
+            }
+
+            auto key = _sfc.template getKey<dim>( ijk );
+
+            sfc_keys[ cell ] = key;
+            
+            mink = std::min( key, mink );
+            maxk = std::max( key, maxk );
+
+        });
+
+        // Key boundaries of current process - unused for now
+        std::vector<SFC_key_t> bounds = { mink, maxk };
+        logs << "\t\t> Local key bounds [" << bounds[ 0 ] << ", " << bounds[ 1 ] << "]" << std::endl;
+
+        std::vector<SFC_key_t> boundaries;
+        boost::mpi::all_gather( world, bounds.data(), static_cast<int>( bounds.size() ), boundaries );
+
+        logs << "\t\t> Global key boundaries [";
+        for(const auto & ik : boundaries )
+            logs << ik << ",";
+        logs << "]" << std::endl;
+
+        // Check overlap with previous/next process. Does not mean that there is no overlap, but at least between "adjacent"
+        // (MPI-1), (MPI+1) there is not overlap found
+        std::vector<SFC_key_t> boundaries_new( static_cast<size_t>( world.size() + 1 ) );
+
+        // find max value for boundaries
+        SFC_key_t globalMax = boundaries[ 0 ];
+        for(size_t ip=0; ip<boundaries.size(); ++ip){
+            globalMax = std::max( boundaries[ ip ], globalMax );
+        }
+
+        // cmpt max theoretical key
+        globalMax += 1;
+
+        // evenly spaced intervals
+        SFC_key_t ds = globalMax / static_cast<size_t>( world.size() );
+        boundaries_new[ 0 ] = 0;
+        for(size_t ip=1; ip<boundaries_new.size(); ++ip){
+            boundaries_new[ ip ] = boundaries_new[ ip - 1 ] + ds;
+        }
+
+        logs << "\t\t> Global key evenly distrib boundaries [";
+        for(const auto & ik : boundaries_new )
+            logs << ik << ",";
+        logs << "]" << std::endl;
+
+        // distribute cell based on boundaries & sfc key
+        std::map<int, bool> comm;
+        samurai::for_each_cell( mesh[Mesh_t::mesh_id_t::cells], [&](const auto & cell ) {
+            auto key = sfc_keys[ cell ];
+
+            // optimize using bisect - find proc that should have this cell
+            for( size_t ip=0; ip< static_cast<size_t>( world.size() ); ++ip ){
+                if( key >= boundaries_new[ ip ] &&  key < boundaries_new[ ip + 1 ] ){
+                    flags[ cell ] = static_cast<int>( ip );
+
+                    // unique list of process that should be contacted
+                    if( comm.find( static_cast<int>( ip ) ) == comm.end() ){
+                        comm[ static_cast<int>( ip ) ] = true;
+                    }
+
+                    break;
+                }
+            }
+
+        });
+
+        return flags;
+    }
+
+    template <class Mesh_t>
+    bool require_balance_impl( Mesh_t & mesh ) {
+
+        boost::mpi::communicator world;
+
+        logs << fmt::format("\n# [SFC_LoadBalancer_interval::Morton] required_balance_impl ") << std::endl;
+
+        double nbCells_tot = 0;
+        std::vector<double> nbCellsPerProc;
+        boost::mpi::all_gather( world, static_cast<double>( mesh.nb_cells( Mesh_t::mesh_id_t::cells ) ), nbCellsPerProc );
+
+        for(size_t ip=0; ip<nbCellsPerProc.size(); ++ip ) {
+            nbCells_tot += nbCellsPerProc[ ip ];
+        }
+
+        // no weight while computing load 
+        double dc = nbCells_tot  / static_cast<double> ( world.size() );
+
+        for(size_t ip=0; ip<nbCellsPerProc.size(); ++ip ) {
+            double diff = std::abs( nbCellsPerProc[ ip ] - dc ) / dc;
+
+            if( diff > _unbalance_threshold ) return true;
+        }
+
+        return false;
+    }
+
+    template <class Mesh_t>
+    auto load_balance_impl( Mesh_t & mesh )
+    {
+
+        boost::mpi::communicator world;
+        
+        // std::ofstream logs;
+        // logs.open( "log_" + std::to_string( world.rank() ) + ".dat", std::ofstream::app );
+        logs << fmt::format("\n# [SFC_LoadBalancer_interval::Morton] Load balancing cells ") << std::endl;
+
+        // SFC 1D key for cells
+        std::map<SFC_key_t, typename Mesh_t::cell_t> sfc_map;
+        auto sfc_keys = samurai::make_field<SFC_key_t, 1>( "keys", mesh );
+        sfc_keys.fill( 0 );
+
+        auto flags = samurai::make_field<int, 1>("rank", mesh);
+        flags.fill( world.rank() );
+         
+        logs << fmt::format("\t> Computing SFC ({}) 1D indices ( cell ) ... ", _sfc.getName() ) << std::endl;
+
+        SFC_key_t mink = std::numeric_limits<SFC_key_t>::max(), maxk = std::numeric_limits<SFC_key_t>::min();
+        samurai::for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto & cell ) {
+
+            // this is where things can get nasty, we expect indices to be positive values !!
+            xt::xtensor_fixed<uint32_t, xt::xshape<Mesh_t::dim>> ijk;
+            for (size_t idim = 0; idim < Mesh_t::dim; ++idim) {
+
+                // FIX need shift to get only positive index
+                assert( cell.indices( idim ) >= 0 );
+                ijk( idim ) = static_cast<uint32_t>( cell.indices( idim ) ) << ( mesh.max_level() - cell.level );
+            }
+
+            auto key = _sfc.template getKey<dim>( ijk );
+
+            sfc_keys[ cell ] = key;
+
+            if( sfc_map.find( key ) != sfc_map.end() ) {
+                assert( false );
+                std::cerr << fmt::format("Rank # {}, Error computing SFC, index not uniq ! ", world.rank()) << std::endl;
+            }
+
+            sfc_map[ key ] = cell;
+
+            mink = std::min( mink, key );
+            maxk = std::max( maxk, key ); 
+
+        });
+
+        assert( mesh.nb_cells( Mesh_t::mesh_id_t::cells ) == sfc_map.size() );
+
+        size_t ncells_tot = 0, dc = 0;
+        std::vector<size_t> nbCellsPerProc;
+        std::vector<size_t> globIdx( static_cast<size_t>( world.size() + 1 ) );
+        std::vector<size_t> globIdxNew( static_cast<size_t>( world.size() + 1 ) );
+        boost::mpi::all_gather( world, mesh.nb_cells( Mesh_t::mesh_id_t::cells ), nbCellsPerProc );
+
+        logs << "\t\t> Number of cells : " << mesh.nb_cells( Mesh_t::mesh_id_t::cells ) << std::endl;
+
+        for(size_t i=0; i<static_cast<size_t>( world.size() ); ++i){
+            globIdx[ i + 1 ] = globIdx[ i ] + nbCellsPerProc[ i ];
+            ncells_tot += nbCellsPerProc[ i ];
+        }
+        dc = ncells_tot / static_cast<size_t>( world.size() );
+
+        // load balanced globIdx -> new theoretical key boundaries based on number of cells per proc
+        globIdxNew[ 0 ] = 0;
+        for(size_t i=0; i<static_cast<size_t>( world.size() ); ++i){
+            globIdxNew[ i + 1 ] = globIdxNew[ i ] + dc;
+        }
+
+        {
+            logs << "\t\t> GlobalIdx : ";
+            for(const auto & i : globIdx )
+                logs << i << ", ";
+            logs << std::endl;
+
+            logs << "\t\t> GlobalIdx balanced : ";
+            for(const auto & i : globIdxNew )
+                logs << i << ", ";
+            logs << std::endl;
+            
+        }   
+
+        size_t start = 0;
+        while( globIdx[ static_cast<size_t>( world.rank() ) ] >= ( start + 1 ) * dc ){
+            start ++;
+        }
+
+        logs << "\t\t> Start @ rank " << start << std::endl;
+
+        size_t count = globIdx[ static_cast<size_t>( world.rank() ) ];
+        for( auto & it : sfc_map ) {
+
+            if( count >= ( start + 1 ) * dc ){
+                start ++;
+                start = std::min( static_cast<size_t>( world.size() - 1 ) , start );
+                logs << "\t\t> Incrementing Start @ rank " << start << ", count " << count << std::endl;
+            }
+            
+            flags[ it.second ] = static_cast<int>( start );
+            
+            count ++;
+        }
+
+        return flags;
+    }
+
+};
+#endif
diff --git a/include/samurai/load_balancing_sfc_w.hpp b/include/samurai/load_balancing_sfc_w.hpp
new file mode 100644
index 000000000..5b6780e82
--- /dev/null
+++ b/include/samurai/load_balancing_sfc_w.hpp
@@ -0,0 +1,348 @@
+/**
+ *
+ * This class implements SFC-based load balancing. The difference between this class
+ * and the one in the load_balancing_sfc.hpp is that, here we try to use level based "weight"
+ * to dispatch cell between processes in addition to the 1D-SFC key.
+ *
+ * This is a try to fix the unbalanced due to the difference of load induced by the difference of
+ * level between two cells. With the traditional (load_balancing_sfc.hpp) strategy even if the
+ * load, in term of number of cells on a process, is good, unbalanced on computational time may
+ * appear.
+ *
+ */
+#pragma once
+
+#include "assertLogTrace.hpp"
+
+#include "load_balancing.hpp"
+#ifdef SAMURAI_WITH_MPI
+namespace Load_balancing{ 
+
+    template <int dim, class SFC_type_t>
+    class SFCw : public samurai::LoadBalancer<SFCw<dim, SFC_type_t>>
+    {
+    private:
+
+        SFC_type_t _sfc;
+        int _ndomains;
+        int _rank;
+
+        const double _unbalance_threshold = 0.05; // 5 %
+
+    public:
+
+        using samurai::LoadBalancer<SFCw<dim, SFC_type_t>>::logs;
+
+        SFCw()
+        {
+#ifdef SAMURAI_WITH_MPI
+            boost::mpi::communicator world;
+            _ndomains = world.size();
+            _rank     = world.rank();
+#else
+            _ndomains = 1;
+            _rank     = 0;
+#endif
+        }
+
+        inline std::string getName() const
+        {
+            return "SFCw_" + _sfc.getName() + "_LB";
+        }
+
+        /**
+        * Compute weights for cell at each level. We expect that small cell (high level)
+        * cost more computational power that largest cell (low level). But this is impacted
+        * by numerical scheme used. As default value, we use power of two starting from levelmin
+        * (low level).
+        */
+        std::vector<double> getWeights( size_t levelmin, size_t levelmax ) {
+            // Computing weights based on maxlevel
+            std::vector<double> weights( levelmax + 1 );
+            
+            for( size_t ilvl=levelmin; ilvl<=levelmax; ++ilvl ){
+                // weights[ ilvl ] = ( 1 << ( ilvl  - levelmin ) );       // prioritize small cell
+                weights[ ilvl ] = 1 << (ilvl * ilvl);
+                // weights[ ilvl ] = 1 << ( levelmax - ilvl ) ; // prioritize large cell
+                // weights[ ilvl ] = 1.;                                // all equal
+                logs << fmt::format("\t\t> Level {}, weight for cell : {}", ilvl, weights[ ilvl ] ) << std::endl;
+            }
+
+            return weights;
+        }
+
+        template <class Mesh_t>
+        bool require_balance_impl( Mesh_t & mesh ) {
+
+            boost::mpi::communicator world;
+
+            logs << fmt::format("\n# [SFCw_LoadBalancer::Morton] required_balance_impl ") << std::endl;
+
+            std::vector<double> weights = getWeights( mesh.min_level(), mesh.max_level() );
+
+            double load = 0;
+            samurai::for_each_cell( mesh[Mesh_t::mesh_id_t::cells], [&](const auto & cell ){
+                load +=  weights[ cell.level ];
+            });
+
+            std::vector<double> nbLoadPerProc;
+            boost::mpi::all_gather( world, load, nbLoadPerProc );
+            
+            double load_tot = 0.;
+            for(size_t i=0; i<nbLoadPerProc.size(); ++i){
+                load_tot += nbLoadPerProc[ i ];
+            }
+
+            double dc = load_tot / static_cast<double>( world.size() );
+
+            for(size_t ip=0; ip<nbLoadPerProc.size(); ++ip ) {
+                double diff = std::abs( nbLoadPerProc[ ip ] - dc ) / dc;
+
+                if( diff > _unbalance_threshold ) return true;
+            }
+
+            return false;
+        }
+
+        /**
+        * Re-order cells on MPI processes based on the given SFC curve. This need to be
+        * called once at the beginning (unless ordering is fixed in partition mesh).
+        * After, load balancing will exchange data based on SFC order and thus only
+        * boundaries will be moved.
+        *
+        */
+        template<class Mesh_t>
+        auto reordering_impl( Mesh_t & mesh ) {
+
+            boost::mpi::communicator world;
+
+            // For debug
+            // std::ofstream logs;
+            // logs.open("log_" + std::to_string(_rank) + ".dat", std::ofstream::app);
+            logs << "# [SFCw_LoadBalancer::Morton] Reordering cells using SFC" << std::endl;
+
+            // SFC 1D key for cells
+            auto sfc_keys = samurai::make_field<SFC_key_t, 1>( "keys", mesh );
+            sfc_keys.fill( 0 );
+
+            auto flags = samurai::make_field<int, 1>("rank", mesh);
+            flags.fill( world.rank() );
+            
+            logs << fmt::format("\t> Computing SFC ({}) 1D indices ( cell ) ... ", _sfc.getName() ) << std::endl;
+
+            SFC_key_t mink = std::numeric_limits<SFC_key_t>::max(), maxk = std::numeric_limits<SFC_key_t>::min();
+
+            samurai::for_each_cell(mesh[Mesh_t::mesh_id_t::cells], [&]( const auto & cell ) {
+
+                // this is where things can get nasty, we expect indices to be positive values !!
+                xt::xtensor_fixed<uint32_t, xt::xshape<Mesh_t::dim>> ijk;
+                for (size_t idim = 0; idim < dim; ++idim) {
+
+                    // FIX need shift to get only positive index
+                    assert( cell.indices( idim ) >= 0 );
+                    ijk( idim ) = static_cast<uint32_t>( cell.indices( idim ) ) << ( mesh.max_level() - cell.level );
+                }
+
+                auto key = _sfc.template getKey<dim>( ijk );
+
+                sfc_keys[ cell ] = key;
+                
+                mink = std::min( key, mink );
+                maxk = std::max( key, maxk );
+
+            });
+
+            // Key boundaries of current process - unused for now
+            std::vector<SFC_key_t> bounds = { mink, maxk };
+            logs << "\t\t> Local key bounds [" << bounds[ 0 ] << ", " << bounds[ 1 ] << "]" << std::endl;
+
+            std::vector<SFC_key_t> boundaries;
+            boost::mpi::all_gather( world, bounds.data(), static_cast<int>( bounds.size() ), boundaries );
+
+            logs << "\t\t> Global key boundaries [";
+            for(const auto & ik : boundaries )
+                logs << ik << ",";
+            logs << "]" << std::endl;
+
+            // Check overlap with previous/next process. Does not mean that there is no overlap, but at least between "adjacent"
+            // (MPI-1), (MPI+1) there is not overlap found
+            std::vector<SFC_key_t> boundaries_new( static_cast<size_t>( world.size() + 1 ) );
+
+            // find max value for boundaries
+            SFC_key_t globalMax = boundaries[ 0 ];
+            for(size_t ip=0; ip<boundaries.size(); ++ip){
+                globalMax = std::max( boundaries[ ip ], globalMax );
+            }
+
+            // cmpt max theoretical key
+            globalMax += 1;
+
+            // evenly spaced intervals
+            SFC_key_t ds = globalMax / static_cast<size_t>( world.size() );
+            boundaries_new[ 0 ] = 0;
+            for(size_t ip=1; ip<boundaries_new.size(); ++ip){
+                boundaries_new[ ip ] = boundaries_new[ ip - 1 ] + ds;
+            }
+
+            logs << "\t\t> Global key evenly distrib boundaries [";
+            for(const auto & ik : boundaries_new )
+                logs << ik << ",";
+            logs << "]" << std::endl;
+
+            // distribute cell based on boundaries & sfc key
+            std::map<int, bool> comm;
+            samurai::for_each_cell( mesh[Mesh_t::mesh_id_t::cells], [&](const auto & cell ) {
+                auto key = sfc_keys[ cell ];
+
+                // optimize using bisect - find proc that should have this cell
+                for( size_t ip=0; ip< static_cast<size_t>( world.size() ); ++ip ){
+                    if( key >= boundaries_new[ ip ] &&  key < boundaries_new[ ip + 1 ] ){
+                        flags[ cell ] = static_cast<int>( ip );
+
+                        // unique list of process that should be contacted
+                        if( comm.find( static_cast<int>( ip ) ) == comm.end() ){
+                            comm[ static_cast<int>( ip ) ] = true;
+                        }
+
+                        break;
+                    }
+                }
+
+            });
+
+            return flags;
+        }
+
+        template <class Mesh_t>
+        auto load_balance_impl( Mesh_t & mesh )
+        {
+
+            boost::mpi::communicator world;
+            
+            // std::ofstream logs;
+            // logs.open( "log_" + std::to_string( world.rank() ) + ".dat", std::ofstream::app );
+            logs << fmt::format("\n# [SFCw_LoadBalancer::Morton] Load balancing cells ") << std::endl;
+
+            // SFC 1D key for each cell
+            std::map<SFC_key_t, typename Mesh_t::cell_t> sfc_map;
+            auto sfc_keys = samurai::make_field<SFC_key_t, 1>( "keys", mesh );
+            sfc_keys.fill( 0 );
+
+            // MPI destination rank of process for each cell
+            auto flags = samurai::make_field<int, 1>("rank", mesh);
+            flags.fill( world.rank() );
+
+            // Computing weights based on maxlevel
+            // std::vector<double> weights( mesh.max_level() + 1 );
+            // for( size_t ilvl=mesh.min_level(); ilvl<=mesh.max_level(); ++ilvl ){
+            //     weights[ ilvl ] = 1 << ( ( ilvl  - mesh.min_level() ) ) ;
+            //     // weights[ ilvl ] = 1 << ( mesh.max_level() - ilvl ) ;
+            //     // weights[ ilvl ] = 1.;
+            //     logs << fmt::format("\t\t> Level {}, weight for cell : {}", ilvl, weights[ ilvl ] ) << std::endl;
+            // }
+
+            std::vector<double> weights = getWeights( mesh.min_level(), mesh.max_level() );
+            
+            
+            logs << fmt::format("\t> Computing SFC ({}) 1D indices ( cell ) ... ", _sfc.getName() ) << std::endl;
+
+            // SFC_key_t mink = std::numeric_limits<SFC_key_t>::max(), maxk = std::numeric_limits<SFC_key_t>::min();
+
+            // compute SFC key for each cell
+            samurai::for_each_cell( mesh[ Mesh_t::mesh_id_t::cells ], [&]( const auto & cell ) {
+
+                // this is where things can get nasty, we expect indices to be positive values !!
+                xt::xtensor_fixed<uint32_t, xt::xshape<Mesh_t::dim>> ijk;
+                for (size_t idim = 0; idim < Mesh_t::dim; ++idim) {
+
+                    // FIX need shift to get only positive index
+                    assert( cell.indices( idim ) >= 0 );
+                    ijk( idim ) = static_cast<uint32_t>( cell.indices( idim ) ) << ( mesh.max_level() - cell.level );
+                }
+
+                auto key = _sfc.template getKey<dim>( ijk );
+
+                sfc_keys[ cell ] = key;
+
+                if( sfc_map.find( key ) != sfc_map.end() ) {
+                    assert( false );
+                    std::cerr << fmt::format("Rank # {}, Error computing SFC, index not uniq ! ", world.rank()) << std::endl;
+                }
+
+                sfc_map[ key ] = cell;
+
+                // mink = std::min( mink, key );
+                // maxk = std::max( maxk, key ); 
+
+            });
+
+            assert( mesh.nb_cells( Mesh_t::mesh_id_t::cells ) == sfc_map.size() );
+
+            size_t nload_tot = 0, dc = 0;
+            std::vector<size_t> nbLoadPerProc;
+            std::vector<size_t> globIdx( static_cast<size_t>( world.size() + 1 ) );
+            std::vector<size_t> globIdxNew( static_cast<size_t>( world.size() + 1 ) );
+
+            // get load for each MPI process
+            size_t load = 0;
+            samurai::for_each_cell( mesh[Mesh_t::mesh_id_t::cells], [&](const auto & cell ){
+                load +=  weights[ cell.level ];
+            });
+
+            boost::mpi::all_gather( world, load, nbLoadPerProc );
+
+            for(size_t i=0; i<static_cast<size_t>( world.size() ); ++i){
+                globIdx[ i + 1 ] = globIdx[ i ] + nbLoadPerProc[ i ];
+                nload_tot += nbLoadPerProc[ i ];
+            }
+            dc = nload_tot / static_cast<size_t>( world.size() );
+
+            logs << fmt::format("\t\t> Load of cells (weighted) : {}, dc : {}", load, dc) << std::endl;
+
+            // load balanced globIdx -> new theoretical key boundaries based on number of cells per proc
+            globIdxNew[ 0 ] = 0;
+            for(size_t i=0; i<static_cast<size_t>( world.size() ); ++i){
+                globIdxNew[ i + 1 ] = globIdxNew[ i ] + dc;
+            }
+
+            {
+                logs << "\t\t> GlobalIdx : ";
+                for(const auto & i : globIdx )
+                    logs << i << ", ";
+                logs << std::endl;
+
+                logs << "\t\t> GlobalIdx balanced : ";
+                for(const auto & i : globIdxNew )
+                    logs << i << ", ";
+                logs << std::endl;
+                
+            }   
+
+            size_t start = 0;
+            while( globIdx[ static_cast<size_t>( world.rank() ) ] >= ( start + 1 ) * dc ){
+                start ++;
+            }
+
+            logs << "\t\t> Start @ rank " << start << std::endl;
+
+            size_t count = globIdx[ static_cast<size_t>( world.rank() ) ];
+            for( auto & it : sfc_map ) {
+
+                if( count >= ( start + 1 ) * dc ){
+                    start ++;
+                    start = std::min( static_cast<size_t>( world.size() - 1 ) , start );
+                    logs << "\t\t> Incrementing Start @ rank " << start << ", count " << count << std::endl;
+                }
+                
+                flags[ it.second ] = static_cast<int>( start );
+                
+                count += weights[ it.second.level ];
+            }
+
+            return flags;
+        }
+
+    };
+
+}
+#endif
diff --git a/include/samurai/load_balancing_utils.hpp b/include/samurai/load_balancing_utils.hpp
new file mode 100644
index 000000000..0af4acba3
--- /dev/null
+++ b/include/samurai/load_balancing_utils.hpp
@@ -0,0 +1,267 @@
+// Copyright 2018-2024 the samurai's authors
+// SPDX-License-Identifier:  BSD-3-Clause
+
+#pragma once
+
+#include <map>
+#include <set>
+#include <xtensor/xslice.hpp>
+
+#include "ptscotch.h"
+
+#include "mesh_interval.hpp"
+#include "stencil.hpp"
+
+using namespace xt::placeholders;
+
+namespace samurai
+{
+    namespace detail
+    {
+        template <class Mesh, class Func>
+        inline void for_each_interface_impl(const Mesh& mesh1, const Mesh& mesh2, Func&& func)
+        {
+            constexpr std::size_t dim        = Mesh::dim;
+            using interval_t                 = typename Mesh::interval_t;
+            using mesh_interval_t            = MeshInterval<dim, interval_t>;
+            const std::size_t max_level_jump = 1;
+
+            auto directions = cartesian_directions<dim>();
+
+            for (std::size_t l_jump = 0; l_jump <= max_level_jump; ++l_jump)
+            {
+                if (mesh1.max_level() - mesh1.min_level() >= l_jump)
+                {
+                    for (std::size_t level = mesh1.min_level(); level <= mesh1.max_level() - l_jump; ++level)
+                    {
+                        for (std::size_t id = 0; id < directions.shape(0); ++id)
+                        {
+                            auto d   = xt::view(directions, id);
+                            auto set = intersection(mesh1[level], translate(mesh2[level + l_jump], d)).on(level + l_jump);
+                            set(
+                                [&](const auto& interval, const auto& index)
+                                {
+                                    mesh_interval_t mesh_interval_from{level + l_jump, interval - d(0), index - xt::view(d, xt::range(1, _))};
+                                    mesh_interval_t mesh_interval_to{level, interval >> l_jump, index >> l_jump};
+                                    mesh_interval_from.i.step = (1 << l_jump);
+                                    for (int ll = 0; ll < (1 << l_jump); ++ll)
+                                    {
+                                        if (mesh_interval_from.i.start == mesh_interval_from.i.end)
+                                        {
+                                            break;
+                                        }
+                                        func(mesh_interval_from, mesh_interval_to);
+                                        mesh_interval_from.i.start++;
+                                    }
+                                });
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    template <class Mesh, class Func>
+    inline void for_each_inner_interface(Mesh& mesh, Func&& func)
+    {
+        using mesh_id_t = typename Mesh::mesh_id_t;
+
+        detail::for_each_interface_impl(mesh[mesh_id_t::cells], mesh[mesh_id_t::cells], std::forward<Func>(func));
+    }
+
+    template <class Mesh, class Func>
+    inline void for_each_subdomain_interface(Mesh& mesh, Func&& func)
+    {
+        using mesh_id_t = typename Mesh::mesh_id_t;
+
+        for (auto& neigh : mesh.mpi_neighbourhood())
+        {
+            detail::for_each_interface_impl(mesh[mesh_id_t::cells], neigh.mesh[mesh_id_t::cells], std::forward<Func>(func));
+        }
+    }
+
+    template <class Mesh, class MeshInterval>
+    inline auto get_local_interval_location(const Mesh& mesh, const MeshInterval& mesh_interval)
+    {
+        std::size_t level  = mesh_interval.level;
+        auto& i            = mesh_interval.i;
+        auto& index        = mesh_interval.index;
+        std::size_t offset = 0;
+        for (std::size_t ll = 0; ll < level; ++ll)
+        {
+            offset += mesh[ll].nb_intervals(0);
+        }
+        return mesh[level].get_interval_location(i, index) + offset;
+    }
+
+    template <class Mesh, class MeshInterval, class LocalIndex>
+    inline auto get_local_start_interval(const Mesh& mesh, const MeshInterval& mesh_interval, const LocalIndex& local_index)
+    {
+        return local_index[get_local_interval_location(mesh, mesh_interval)] + mesh_interval.i.start;
+    }
+
+    template <class idx_t>
+    struct graph
+    {
+        std::vector<idx_t> global_index_offset;
+        std::vector<idx_t> xadj;
+        std::vector<idx_t> adjncy;
+        std::vector<idx_t> adjwgt;
+        std::vector<float> xyz;
+    };
+
+    template <class Mesh_t>
+    auto build_graph(const Mesh_t& all_meshes)
+    {
+        constexpr std::size_t dim = Mesh_t::dim;
+        using mesh_id_t           = typename Mesh_t::mesh_id_t;
+
+        // graph<idx_t> g;
+        graph<SCOTCH_Num> g;
+
+        boost::mpi::communicator world;
+
+        auto& mesh = all_meshes[mesh_id_t::cells];
+
+        std::vector<std::size_t> global_sizes(world.size());
+        mpi::all_gather(world, mesh.nb_cells(), global_sizes);
+
+        g.global_index_offset.resize(world.size() + 1);
+        for (std::size_t i = 1; i < g.global_index_offset.size(); ++i)
+        {
+            g.global_index_offset[i] = g.global_index_offset[i - 1] + global_sizes[i - 1];
+        }
+
+        // Construct the local index numbering of the cells
+        std::vector<int> local_index(mesh.nb_intervals(0));
+        std::size_t counter      = 0;
+        std::size_t current_size = 0;
+        for_each_interval(mesh,
+                          [&](std::size_t level, const auto& i, auto& index)
+                          {
+                              local_index[counter++] = current_size - i.start;
+                              current_size += i.size();
+                          });
+
+        // Construct the list adjacent cells for each cell
+        std::map<std::size_t, std::vector<std::size_t>> adj_cells;
+        std::map<std::size_t, std::vector<std::size_t>> adj_weights;
+        std::size_t data_size = 0;
+
+        // interior interface
+        for_each_inner_interface(
+            all_meshes,
+            [&](const auto& mesh_interval_from, const auto& mesh_interval_to)
+            {
+                auto start_from = get_local_start_interval(mesh, mesh_interval_from, local_index) + g.global_index_offset[world.rank()];
+                auto start_to   = get_local_start_interval(mesh, mesh_interval_to, local_index) + g.global_index_offset[world.rank()];
+
+                // std::cout << "From: " << mesh_interval_from.level << " " << mesh_interval_from.i << " " << mesh_interval_from.index
+                //           << std::endl;
+                // std::cout << "To: " << mesh_interval_to.level << " " << mesh_interval_to.i << " " << mesh_interval_to.index << std::endl;
+                auto step = mesh_interval_from.i.step;
+                for (int i = 0; i < mesh_interval_to.i.size(); ++i)
+                {
+                    // std::cout << i << " " << step << " " << start_from + i * step << " " << start_to + i << std::endl;
+                    adj_cells[start_from + i * step].push_back(start_to + i);
+                    adj_weights[start_from + i * step].push_back(0.5 * (mesh_interval_from.level + mesh_interval_to.level));
+                    data_size++;
+                    if (mesh_interval_from.level != mesh_interval_to.level)
+                    {
+                        adj_cells[start_to + i].push_back(start_from + i * step);
+                        adj_weights[start_to + i].push_back(0.5 * (mesh_interval_from.level + mesh_interval_to.level));
+                        data_size++;
+                    }
+                }
+                // std::cout << std::endl;
+            });
+
+        std::vector<mpi::request> req;
+        std::vector<std::vector<std::size_t>> to_send(all_meshes.mpi_neighbourhood().size());
+        std::set<int> recv_from;
+
+        std::size_t i_neigh = 0;
+        for (auto& neigh : all_meshes.mpi_neighbourhood())
+        {
+            detail::for_each_interface_impl(mesh,
+                                            neigh.mesh[mesh_id_t::cells],
+                                            [&](const auto&, const auto& mesh_interval)
+                                            {
+                                                recv_from.insert(neigh.rank);
+                                                auto start = get_local_start_interval(mesh, mesh_interval, local_index)
+                                                           + g.global_index_offset[world.rank()];
+                                                to_send[i_neigh].push_back(static_cast<std::size_t>(start));
+                                            });
+            if (recv_from.find(neigh.rank) != recv_from.end())
+            {
+                req.push_back(world.isend(neigh.rank, neigh.rank, to_send[i_neigh++]));
+            }
+        }
+
+        for (auto& neigh : all_meshes.mpi_neighbourhood())
+        {
+            if (recv_from.find(neigh.rank) != recv_from.end())
+            {
+                std::vector<std::size_t> to_recv;
+                world.recv(neigh.rank, world.rank(), to_recv);
+
+                std::size_t to_recv_counter = 0;
+                detail::for_each_interface_impl(
+                    neigh.mesh[mesh_id_t::cells],
+                    mesh,
+                    [&](const auto& mesh_interval, const auto& mesh_interval_to)
+                    {
+                        auto start_1 = get_local_start_interval(mesh, mesh_interval, local_index) + g.global_index_offset[world.rank()];
+                        auto start_2 = to_recv[to_recv_counter++];
+
+                        for (int i = 0, i1 = start_1, i2 = start_2; i < mesh_interval.i.size(); ++i, ++i1, ++i2)
+                        {
+                            adj_cells[i1].push_back(i2);
+                            adj_weights[start_1 + i].push_back(0.5 * (mesh_interval.level + mesh_interval_to.level));
+                            data_size += 1;
+                        }
+                    });
+            }
+        }
+
+        mpi::wait_all(req.begin(), req.end());
+
+        // Construct the adjacency list in CSR format
+        g.xadj.resize(mesh.nb_cells() + 1);
+
+        g.xadj[0]          = 0;
+        std::size_t i_xadj = 0;
+        for (auto& v : adj_cells)
+        {
+            g.xadj[i_xadj + 1] = g.xadj[i_xadj] + v.second.size();
+            ++i_xadj;
+        }
+
+        g.adjncy.resize(g.xadj.back());
+        g.adjwgt.resize(g.xadj.back());
+        i_xadj = 0;
+        for (auto& v : adj_cells)
+        {
+            std::copy(v.second.begin(), v.second.end(), g.adjncy.begin() + g.xadj[i_xadj]);
+            ++i_xadj;
+        }
+
+        i_xadj = 0;
+        for (auto& v : adj_weights)
+        {
+            std::copy(v.second.begin(), v.second.end(), g.adjwgt.begin() + g.xadj[i_xadj]);
+            ++i_xadj;
+        }
+
+        std::cout << data_size << " " << g.xadj.back() << std::endl;
+        // assert(data_size == g.xadj.back());
+        g.xyz.reserve(mesh.nb_cells() * dim);
+        for_each_cell(mesh,
+                      [&](const auto& cell)
+                      {
+                          auto center = cell.center();
+                          std::copy(center.begin(), center.end(), std::back_inserter(g.xyz));
+                      });
+        return g;
+    }
+}
diff --git a/include/samurai/load_balancing_void.hpp b/include/samurai/load_balancing_void.hpp
new file mode 100644
index 000000000..06f896bb8
--- /dev/null
+++ b/include/samurai/load_balancing_void.hpp
@@ -0,0 +1,52 @@
+/**
+ * Empty class, used for test to compare with and without load balancing. 
+ *  
+*/
+
+#pragma once
+
+#include <map>
+#include "load_balancing.hpp"
+
+#ifdef SAMURAI_WITH_MPI
+template<int dim>
+class Void_LoadBalancer: public samurai::LoadBalancer<Void_LoadBalancer<dim>> {
+
+    private:
+        int _ndomains;
+        int _rank;
+
+    public:
+
+        Void_LoadBalancer() {
+#ifdef SAMURAI_WITH_MPI
+            boost::mpi::communicator world;
+            _ndomains = world.size();
+            _rank     = world.rank();
+#else
+            _ndomains = 1;
+            _rank     = 0;
+#endif
+        }
+
+        inline std::string getName() const { return "Void_LB"; }
+
+        template <class Mesh_t>
+        bool require_balance_impl( [[maybe_unused]] Mesh_t & mesh ) { return false; }
+
+        template<class Mesh_t>
+        auto reordering_impl( Mesh_t & mesh ) { 
+            auto flags = samurai::make_field<int, 1>("balancing_flags", mesh);
+            flags.fill( _rank );
+            return flags;
+        }
+
+        template<class Mesh_t>
+        auto load_balance_impl( Mesh_t & mesh ){
+            auto flags = samurai::make_field<int, 1>("balancing_flags", mesh);
+            flags.fill( _rank );
+            return flags;
+        }
+
+};
+#endif
diff --git a/include/samurai/mesh.hpp b/include/samurai/mesh.hpp
index 4f8387551..207661b6b 100644
--- a/include/samurai/mesh.hpp
+++ b/include/samurai/mesh.hpp
@@ -104,6 +104,8 @@ namespace samurai
         bool is_periodic(std::size_t d) const;
         const std::array<bool, dim>& periodicity() const;
         // std::vector<int>& neighbouring_ranks();
+
+        const std::vector<mpi_subdomain_t>& mpi_neighbourhood() const;
         std::vector<mpi_subdomain_t>& mpi_neighbourhood();
 
         void swap(Mesh_base& mesh) noexcept;
@@ -132,12 +134,16 @@ namespace samurai
         void update_mesh_neighbour();
         void to_stream(std::ostream& os) const;
 
+        void merge(ca_type& lca);
+        void remove(ca_type& lca);
+
       protected:
 
         using derived_type = D;
 
         Mesh_base() = default; // cppcheck-suppress uninitMemberVar
         Mesh_base(const cl_type& cl, const self_type& ref_mesh);
+        Mesh_base(const ca_type& ca, const self_type& ref_mesh);
         Mesh_base(const cl_type& cl, std::size_t min_level, std::size_t max_level);
         Mesh_base(const samurai::Box<double, dim>& b,
                   std::size_t start_level,
@@ -153,6 +159,9 @@ namespace samurai
                   double approx_box_tol = lca_type::default_approx_box_tol,
                   double scaling_factor = 0);
 
+        // Used for load balancing
+        Mesh_base(const cl_type& cl, std::size_t min_level, std::size_t max_level, std::vector<mpi_subdomain_t>& neighbourhood);
+
         derived_type& derived_cast() & noexcept;
         const derived_type& derived_cast() const& noexcept;
         derived_type derived_cast() && noexcept;
@@ -194,7 +203,7 @@ namespace samurai
             ar& m_subdomain;
             ar& m_union;
             ar& m_min_level;
-            ar& m_min_level;
+            ar& m_max_level;
         }
 #endif
     };
@@ -233,7 +242,6 @@ namespace samurai
 
 #ifdef SAMURAI_WITH_MPI
         partition_mesh(start_level, b);
-        // load_balancing();
 #else
         this->m_cells[mesh_id_t::cells][start_level] = {start_level, b, approx_box_tol, scaling_factor_};
 #endif
@@ -277,6 +285,8 @@ namespace samurai
 
         set_origin_point(origin_point());
         set_scaling_factor(scaling_factor());
+        update_mesh_neighbour();
+
     }
 
     template <class D, class Config>
@@ -292,7 +302,7 @@ namespace samurai
         construct_subdomain();
         m_domain = m_subdomain;
         construct_union();
-        update_sub_mesh();
+        update_sub_mesh(); // MPI AllReduce inside
         renumbering();
         update_mesh_neighbour();
 
@@ -321,6 +331,48 @@ namespace samurai
         set_scaling_factor(ref_mesh.scaling_factor());
     }
 
+    template <class D, class Config>
+    inline Mesh_base<D, Config>::Mesh_base(const ca_type& ca, const self_type& ref_mesh)
+        : m_domain(ref_mesh.m_domain)
+        , m_min_level(ref_mesh.m_min_level)
+        , m_max_level(ref_mesh.m_max_level)
+        , m_periodic(ref_mesh.m_periodic)
+        , m_mpi_neighbourhood(ref_mesh.m_mpi_neighbourhood)
+
+    {
+        m_cells[mesh_id_t::cells] = ca;
+
+        construct_subdomain();
+        construct_union();
+        update_sub_mesh();
+        renumbering();
+        update_mesh_neighbour();
+    }
+
+    template <class D, class Config>
+    inline Mesh_base<D, Config>::Mesh_base(const cl_type& cl,
+                                           std::size_t min_level,
+                                           std::size_t max_level,
+                                           std::vector<mpi_subdomain_t>& neighbourhood)
+        : m_min_level(min_level)
+        , m_max_level(max_level)
+        , m_mpi_neighbourhood(neighbourhood)
+    {
+        m_periodic.fill(false);
+        assert(min_level <= max_level);
+
+        // what to do with m_domain ?
+        m_domain = m_subdomain;
+
+        m_cells[mesh_id_t::cells] = {cl, false};
+
+        construct_subdomain();   // required ?
+        construct_union();       // required ?
+        update_sub_mesh();       // perform MPI allReduce calls
+        renumbering();           // required ?
+        update_mesh_neighbour(); // required to do that here ??
+    }
+
     template <class D, class Config>
     inline auto Mesh_base<D, Config>::cells() -> mesh_t&
     {
@@ -506,6 +558,12 @@ namespace samurai
         return m_periodic;
     }
 
+    template <class D, class Config>
+    inline auto Mesh_base<D, Config>::mpi_neighbourhood() const -> const std::vector<mpi_subdomain_t>&
+    {
+        return m_mpi_neighbourhood;
+    }
+
     template <class D, class Config>
     inline auto Mesh_base<D, Config>::mpi_neighbourhood() -> std::vector<mpi_subdomain_t>&
     {
@@ -667,7 +725,8 @@ namespace samurai
         int product_of_sizes     = 1;
         for (std::size_t d = 0; d < dim - 1; ++d)
         {
-            sizes[d] = static_cast<int>(floor(pow(size, 1. / dim) * global_box.length()[d] / length_harmonic_avg));
+            sizes[d] = std::max(static_cast<int>(floor(pow(size, 1. / dim) * global_box.length()[d] / length_harmonic_avg)), 1);
+
             product_of_sizes *= sizes[d];
         }
         sizes[dim - 1] = size / product_of_sizes;
@@ -715,6 +774,8 @@ namespace samurai
         this->m_cells[mesh_id_t::cells][start_level] = {start_level, subdomain_box};
         */
 
+	/** delete for git rebase
+	 **/
         lcl_type subdomain_cells(start_level, m_domain.origin_point(), m_domain.scaling_factor());
         auto subdomain_nb_intervals = m_domain.nb_intervals() / static_cast<std::size_t>(size);
         auto subdomain_start        = static_cast<std::size_t>(rank) * subdomain_nb_intervals;
@@ -732,6 +793,9 @@ namespace samurai
                               });
 
         this->m_cells[mesh_id_t::cells][start_level] = subdomain_cells;
+	// end comment for git rebase
+        // this->m_cells[mesh_id_t::cells][start_level] = {start_level, subdomain_box}; 
+	
 
         m_mpi_neighbourhood.reserve(static_cast<std::size_t>(size) - 1);
         for (int ir = 0; ir < size; ++ir)
@@ -746,110 +810,81 @@ namespace samurai
         // m_mpi_neighbourhood.reserve(static_cast<std::size_t>(pow(3, dim) - 1));
         // auto neighbour = [&](xt::xtensor_fixed<int, xt::xshape<dim>> shift)
         // {
-        //     auto neighbour_rank            = rank;
-        //     int product_of_preceding_sizes = 1;
-        //     for (std::size_t d = 0; d < dim; ++d)
-        //     {
-        //         neighbour_rank += product_of_preceding_sizes * shift[d];
-        //         product_of_preceding_sizes *= sizes[d];
-        //     }
-        //     return neighbour_rank;
-        // };
-
-        // static_nested_loop<dim, -1, 2>(
-        //     [&](auto& shift)
-        //     {
-        //         if (xt::any(shift))
-        //         {
-        //             for (std::size_t d = 0; d < dim; ++d)
-        //             {
-        //                 if (coords[d] + shift[d] < 0 || coords[d] + shift[d] >= sizes[d])
-        //                 {
-        //                     return;
-        //                 }
-        //             }
-        //             m_mpi_neighbourhood.push_back(neighbour(shift));
-        //         }
-        //     });
-
+        //    auto neighbour_rank            = rank;
+        //    int product_of_preceding_sizes = 1;
+        //    for (std::size_t d = 0; d < dim; ++d)
+        //    {
+        //        neighbour_rank += product_of_preceding_sizes * shift[d];
+        //        product_of_preceding_sizes *= sizes[d];
+        //    }
+        //    return neighbour_rank;
+        //};
+
+        //static_nested_loop<dim, -1, 2>(
+        //    [&](auto& shift)
+        //    {
+        //        if (xt::any(shift))
+        //        {
+        //            for (std::size_t d = 0; d < dim; ++d)
+        //            {
+        //                if (coords[d] + shift[d] < 0 || coords[d] + shift[d] >= sizes[d])
+        //                {
+        //                    return;
+        //                }
+        //            }
+        //            m_mpi_neighbourhood.push_back(neighbour(shift));
+        //        }
+         //   });
 #endif
     }
 
     template <class D, class Config>
-    void Mesh_base<D, Config>::load_balancing()
+    void Mesh_base<D, Config>::merge(ca_type& lca)
     {
-#ifdef SAMURAI_WITH_MPI
-        mpi::communicator world;
-        auto rank = world.rank();
+        // merge received cells
 
-        std::size_t load = nb_cells(mesh_id_t::cells);
-        std::vector<std::size_t> loads;
+        auto& refmesh = this->m_cells[mesh_id_t::cells];
 
-        std::vector<double> load_fluxes(m_mpi_neighbourhood.size(), 0);
+        auto minlevel = std::min(refmesh.min_level(), lca.min_level());
+        auto maxlevel = std::max(refmesh.max_level(), lca.max_level());
 
-        const std::size_t n_iterations = 1;
-
-        for (std::size_t k = 0; k < n_iterations; ++k)
+        cl_type cl;
+        for (size_t ilvl = minlevel; ilvl <= maxlevel; ++ilvl)
         {
-            world.barrier();
-            if (rank == 0)
-            {
-                std::cout << "---------------- k = " << k << " ----------------" << std::endl;
-            }
-            mpi::all_gather(world, load, loads);
-
-            std::vector<std::size_t> nb_neighbours;
-            mpi::all_gather(world, m_mpi_neighbourhood.size(), nb_neighbours);
+            auto un = samurai::union_(refmesh[ilvl], lca[ilvl]);
 
-            double load_np1 = static_cast<double>(load);
-            for (std::size_t i_rank = 0; i_rank < m_mpi_neighbourhood.size(); ++i_rank)
-            {
-                auto neighbour = m_mpi_neighbourhood[i_rank];
-
-                auto neighbour_load = loads[static_cast<std::size_t>(neighbour.rank)];
-                int neighbour_load_minus_my_load;
-                if (load < neighbour_load)
+            un(
+                [&](auto& interval, auto& indices)
                 {
-                    neighbour_load_minus_my_load = static_cast<int>(neighbour_load - load);
-                }
-                else
-                {
-                    neighbour_load_minus_my_load = -static_cast<int>(load - neighbour_load);
-                }
-                double weight       = 1. / std::max(m_mpi_neighbourhood.size(), nb_neighbours[neighbour.rank]);
-                load_fluxes[i_rank] = weight * neighbour_load_minus_my_load;
-                load_np1 += load_fluxes[i_rank];
-            }
-            load_np1 = floor(load_np1);
-
-            load_transfer(load_fluxes);
-
-            std::cout << rank << ": load = " << load << ", load_np1 = " << load_np1 << std::endl;
-
-            load = static_cast<std::size_t>(load_np1);
+                    cl[ilvl][indices].add_interval(interval);
+                });
         }
-#endif
+
+        refmesh = {cl, false};
     }
 
     template <class D, class Config>
-    void Mesh_base<D, Config>::load_transfer([[maybe_unused]] const std::vector<double>& load_fluxes)
+    void Mesh_base<D, Config>::remove(ca_type& lca)
     {
-#ifdef SAMURAI_WITH_MPI
-        mpi::communicator world;
-        std::cout << world.rank() << ": ";
-        for (std::size_t i_rank = 0; i_rank < m_mpi_neighbourhood.size(); ++i_rank)
+        auto& refmesh = this->m_cells[mesh_id_t::cells];
+
+        // remove cells
+        cl_type cl;
+        size_t diff_ncells = 0;
+        for (size_t ilvl = refmesh.min_level(); ilvl <= refmesh.max_level(); ++ilvl)
         {
-            auto neighbour = m_mpi_neighbourhood[i_rank];
-            if (load_fluxes[i_rank] < 0) // must tranfer load to the neighbour
-            {
-            }
-            else if (load_fluxes[i_rank] > 0) // must receive load from the neighbour
-            {
-            }
-            std::cout << "--> " << neighbour.rank << ": " << load_fluxes[i_rank] << ", ";
+            auto diff = samurai::difference(refmesh[ilvl], lca[ilvl]);
+
+            diff(
+                [&](auto& interval, auto& index)
+                {
+                    cl[ilvl][index].add_interval(interval);
+                    diff_ncells += interval.size();
+                });
         }
-        std::cout << std::endl;
-#endif
+
+        // new mesh for current process
+        refmesh = {cl, false};
     }
 
     template <class D, class Config>
diff --git a/include/samurai/morton.hpp b/include/samurai/morton.hpp
new file mode 100644
index 000000000..da29d3d79
--- /dev/null
+++ b/include/samurai/morton.hpp
@@ -0,0 +1,155 @@
+#pragma once
+
+#include <vector>
+#include <cstdint>
+
+#include "sfc.hpp"
+
+class Morton : public SFC<Morton> {
+
+    private:
+
+        /*
+        * Split bit by 3, version for 2D, used to compute morton index
+        *
+        * Parameters:
+        *             logical_a : coordonnées logiques (i ou j)
+        *
+        * return :
+        *              x        : unsigned 64bits with splited by 3
+        */
+        inline SFC_key_t splitBy3_2D( uint32_t logical_a ) const {
+
+            SFC_key_t x = logical_a & 0xffffffff;
+            x = (x | x << 16) & 0xffff0000ffff;
+            x = (x | x << 8) & 0xff00ff00ff00ff;
+            x = (x | x << 4) & 0xf0f0f0f0f0f0f0f;
+            x = (x | x << 2) & 0x3333333333333333;
+            x = (x | x << 1) & 0x5555555555555555;
+
+            return x;
+        }
+
+        /*
+        * Split bit by 3, version for 3D, used to compute morton index
+        *
+        * Parameters:
+        *             logical_a : coordonnées logiques (i ou j ou k)
+        *
+        * return :
+        *              x        : unsigned 64bits with splited by 3
+        */
+        inline SFC_key_t splitBy3_3D(uint32_t logical_a) const {
+
+            SFC_key_t x = logical_a & 0x1fffff;
+
+            x = (x | x << 32) & 0x1f00000000ffff;
+            x = (x | x << 16) & 0x1f0000ff0000ff;
+            x = (x | x <<  8) & 0x100f00f00f00f00f;
+            x = (x | x <<  4) & 0x10c30c30c30c30c3;
+            x = (x | x <<  2) & 0x1249249249249249;
+
+            return x;
+
+        }
+
+
+    public:
+
+        inline std::string getName() const { return "Morton"; }
+
+        /*
+        * Return the morton index from logical coordinate (i,j) or (i,j,k)
+        *
+        * Parameters:
+        *             lc : structure containing logical coordinate
+        *
+        * Return :
+        *             key    : morton key
+        */
+        template<class Coord_t>
+        inline SFC_key_t getKey_2D_impl( const Coord_t & lc ) const {
+            SFC_key_t la_clef = 0;
+            la_clef |= splitBy3_2D( lc( 0 ) ) | splitBy3_2D( lc( 1 ) ) << 1;
+            return la_clef;
+        }
+
+        template<class Coord_t>
+        inline SFC_key_t getKey_3D_impl( const Coord_t & lc ) const {
+            SFC_key_t la_clef = 0;
+            la_clef |= splitBy3_3D( lc( 0 ) ) | splitBy3_3D( lc( 1 ) ) << 1 | splitBy3_3D( lc( 2 ) ) << 2;
+            return la_clef;
+        }
+
+        /*
+        * Return the logical coordinate (i,j) or (i,j,k) from the morton index
+        *
+        * Parameters:
+        *             clef (in): morton index
+        *
+        * Return :
+        *             lc  (out): 2D/3D logical coordinates
+        */
+        inline auto getCoordinates_2D_impl( const SFC_key_t & clef ) const {
+
+            xt::xtensor_fixed<uint32_t, xt::xshape<2>> lc = { 0, 0 };
+
+            // Extract coord i
+            SFC_key_t keyi = clef >> 0;
+            keyi &= 0x5555555555555555;
+            keyi = (keyi ^ (keyi >> 1))  & 0x3333333333333333;
+            keyi = (keyi ^ (keyi >> 2))  & 0x0f0f0f0f0f0f0f0f;
+            keyi = (keyi ^ (keyi >> 4))  & 0x00ff00ff00ff00ff;
+            keyi = (keyi ^ (keyi >> 8))  & 0x0000ffff0000ffff;
+            keyi = (keyi ^ (keyi >> 16)) & 0x00000000ffffffff;
+            lc( 0 ) = static_cast<uint32_t>( keyi );
+
+            // extract coord j
+            SFC_key_t keyj = clef >> 1;
+            keyj &= 0x5555555555555555;
+            keyj = (keyj ^ (keyj >> 1))  & 0x3333333333333333;
+            keyj = (keyj ^ (keyj >> 2))  & 0x0f0f0f0f0f0f0f0f;
+            keyj = (keyj ^ (keyj >> 4))  & 0x00ff00ff00ff00ff;
+            keyj = (keyj ^ (keyj >> 8))  & 0x0000ffff0000ffff;
+            keyj = (keyj ^ (keyj >> 16)) & 0x00000000ffffffff;
+            lc( 1 ) = static_cast<uint32_t>( keyj );
+
+            return lc;
+        }
+
+        inline auto getCoordinates_3D_impl( const SFC_key_t & clef ) const {
+
+            xt::xtensor_fixed<uint32_t, xt::xshape<3>> lc = { 0, 0, 0 };
+
+            // Extract coord i
+            SFC_key_t keyi = clef >> 0;
+            keyi &= 0x1249249249249249;
+            keyi = (keyi ^ (keyi >> 2))  & 0x30c30c30c30c30c3;
+            keyi = (keyi ^ (keyi >> 4))  & 0xf00f00f00f00f00f;
+            keyi = (keyi ^ (keyi >> 8))  & 0x00ff0000ff0000ff;
+            keyi = (keyi ^ (keyi >> 16)) & 0x00ff00000000ffff;
+            keyi = (keyi ^ (keyi >> 32)) & 0x1fffff;
+            lc( 0 ) = static_cast<uint32_t>( keyi ); // assert for overflow ?
+
+            SFC_key_t keyj = clef >> 1;
+            keyj &= 0x1249249249249249;
+            keyj = (keyj ^ (keyj >> 2))  & 0x30c30c30c30c30c3;
+            keyj = (keyj ^ (keyj >> 4))  & 0xf00f00f00f00f00f;
+            keyj = (keyj ^ (keyj >> 8))  & 0x00ff0000ff0000ff;
+            keyj = (keyj ^ (keyj >> 16)) & 0x00ff00000000ffff;
+            keyj = (keyj ^ (keyj >> 32)) & 0x1fffff;
+            lc( 1 ) = static_cast<uint32_t>( keyj );
+
+            SFC_key_t keyk = clef >> 2;
+            keyk &= 0x1249249249249249;
+            keyk = (keyk ^ (keyk >> 2))  & 0x30c30c30c30c30c3;
+            keyk = (keyk ^ (keyk >> 4))  & 0xf00f00f00f00f00f;
+            keyk = (keyk ^ (keyk >> 8))  & 0x00ff0000ff0000ff;
+            keyk = (keyk ^ (keyk >> 16)) & 0x00ff00000000ffff;
+            keyk = (keyk ^ (keyk >> 32)) & 0x1fffff;
+            lc( 2 ) = static_cast<uint32_t>( keyk );
+
+            return lc;
+        }
+
+};
diff --git a/include/samurai/mr/mesh.hpp b/include/samurai/mr/mesh.hpp
index 2ab62fbb4..54a6d633f 100644
--- a/include/samurai/mr/mesh.hpp
+++ b/include/samurai/mr/mesh.hpp
@@ -73,6 +73,7 @@ namespace samurai
 
         MRMesh() = default;
         MRMesh(const cl_type& cl, const self_type& ref_mesh);
+        MRMesh(const ca_type& ca, const self_type& ref_mesh);
         MRMesh(const cl_type& cl, std::size_t min_level, std::size_t max_level);
         MRMesh(const samurai::Box<double, dim>& b,
                std::size_t min_level,
@@ -85,6 +86,13 @@ namespace samurai
                const std::array<bool, dim>& periodic,
                double approx_box_tol = lca_type::default_approx_box_tol,
                double scaling_factor = 0);
+        // Used for load balancing, finally commented
+        //MRMesh(const cl_type & cl, 
+	//	std::size_t min_level,
+	//      std::size_t max_level, 
+	//	std::vector<mpi_subdomain_t> & neighbourhood,
+	//	double approx_box_tol = lca_type::default_approx_box_tol,
+	//	double scaling_factor = 0);
 
         void update_sub_mesh_impl();
 
@@ -98,6 +106,12 @@ namespace samurai
     {
     }
 
+    template <class Config>
+    inline MRMesh<Config>::MRMesh(const ca_type& ca, const self_type& ref_mesh)
+        : base_type(ca, ref_mesh)
+    {
+    }
+
     template <class Config>
     inline MRMesh<Config>::MRMesh(const cl_type& cl, std::size_t min_level, std::size_t max_level)
         : base_type(cl, min_level, max_level)
@@ -125,6 +139,13 @@ namespace samurai
     {
     }
 
+    // template <class Config>
+    // inline MRMesh<Config>::MRMesh( const cl_type & cl, std::size_t min_level, std::size_t max_level, 
+    //                                 std::vector<mpi_subdomain_t> & neighbourhood)
+    //     : base_type(cl, min_level, max_level, neighbourhood )
+    // {
+    // }
+
     template <class Config>
     inline void MRMesh<Config>::update_sub_mesh_impl()
     {
diff --git a/include/samurai/samurai.hpp b/include/samurai/samurai.hpp
index f1a449d6f..bb04345af 100644
--- a/include/samurai/samurai.hpp
+++ b/include/samurai/samurai.hpp
@@ -31,6 +31,7 @@ namespace samurai
 
 #ifdef SAMURAI_WITH_MPI
         MPI_Init(&argc, &argv);
+        boost::mpi::communicator world;
 #endif
         times::timers.start("total runtime");
         return app;
@@ -46,6 +47,7 @@ namespace samurai
 #ifdef SAMURAI_WITH_MPI
         MPI_Init(nullptr, nullptr);
 #endif
+        times::timers.start("smr::total_runtime");
     }
 
     inline void finalize()
diff --git a/include/samurai/sfc.hpp b/include/samurai/sfc.hpp
new file mode 100644
index 000000000..cd2561b28
--- /dev/null
+++ b/include/samurai/sfc.hpp
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <cstdint>
+
+#include "assertLogTrace.hpp"
+
+using SFC_key_t = uint64_t;
+
+template<class SFC_Flavor>
+class SFC {
+
+    public:
+        
+        /**
+         * 
+         * Interface to compute Space Filling Curve 1D index from
+         * 2D/3D logical coordinates of cell.
+         * 
+        */
+        template<int dim, class Coord_t>
+        inline SFC_key_t getKey( const Coord_t & lc ) const{
+            if constexpr ( dim == 2 ) return static_cast<const SFC_Flavor*>( this )->getKey_2D_impl( lc );
+            if constexpr ( dim == 3 ) return static_cast<const SFC_Flavor*>( this )->getKey_3D_impl( lc );
+        }
+
+        /**
+         * 
+         * Interface to compute logical coordinate from Space Filling Curve 1D index.
+         * 
+        */
+        template<int dim>
+        inline auto getCoordinates( const SFC_key_t & clef ) const{
+            if constexpr ( dim == 2 ) return static_cast<const SFC_Flavor*>( this )->getCoordinates_2D_impl( clef );
+            if constexpr ( dim == 3 ) return static_cast<const SFC_Flavor*>( this )->getCoordinates_3D_impl( clef );
+        }
+
+        inline auto getName() const -> std::string {
+            return static_cast<const SFC_Flavor*>( this )->getName();
+        }
+};
\ No newline at end of file
diff --git a/include/samurai/statistics.hpp b/include/samurai/statistics.hpp
index 14a674ddb..4e58aa676 100644
--- a/include/samurai/statistics.hpp
+++ b/include/samurai/statistics.hpp
@@ -14,16 +14,18 @@ namespace samurai
 #if defined(WITH_STATS)
     struct Statistics
     {
-        Statistics(const std::string& filename, int save_all = 10)
-            : filename(filename)
-            , save_all(save_all)
+        Statistics( const std::string & filename, std::size_t save_all = 10)
+            : _outfile( filename )
             , icurrent(0)
+            , save_all(save_all)
         {
         }
 
         template <class Mesh>
-        void operator()(std::string test_case, const Mesh& mesh)
+        void operator()( std::string test_case, Mesh& mesh )
         {
+
+
             icurrent++;
             using mesh_id_t = typename Mesh::mesh_id_t;
             auto ca         = mesh[mesh_id_t::cells];
@@ -32,6 +34,10 @@ namespace samurai
             std::size_t min_level = ca.min_level();
             std::size_t max_level = ca.max_level();
 
+            size_t n_neighbours = mesh.mpi_neighbourhood().size();
+            size_t n_cells_l    = mesh.nb_cells( Mesh::mesh_id_t::cells );
+            size_t n_cells_g    = mesh.nb_cells( Mesh::mesh_id_t::reference ) - mesh.nb_cells( Mesh::mesh_id_t::cells );
+
             auto comp = [](const auto& a, const auto& b)
             {
                 return a.size() < b.size();
@@ -72,6 +78,9 @@ namespace samurai
                 stats[test_case].push_back({
                     {"min_level", min_level},
                     {"max_level", max_level},
+                    {"n_neighbours", n_neighbours},
+                    {"n_cells_g", n_cells_g}, // ghost ?
+                    {"n_cells_l", n_cells_l}, // leaves
                     {"by_level",  by_level }
                 });
             }
@@ -81,6 +90,9 @@ namespace samurai
                 out.push_back({
                     {"min_level", min_level},
                     {"max_level", max_level},
+                    {"n_neighbours", n_neighbours},
+                    {"n_cells_g", n_cells_g}, // ghost ?
+                    {"n_cells_l", n_cells_l}, // leaves
                     {"by_level",  by_level }
                 });
                 stats[test_case] = out;
@@ -88,7 +100,7 @@ namespace samurai
 
             if (icurrent == save_all)
             {
-                std::ofstream ofile(filename);
+                std::ofstream ofile( _outfile );
                 ofile << std::setw(4) << stats << std::endl;
                 icurrent = 0;
             }
@@ -96,17 +108,16 @@ namespace samurai
 
         ~Statistics()
         {
-            std::ofstream file(filename);
+            std::ofstream file( _outfile );
             file << std::setw(4) << stats;
         }
 
-        std::string filename;
+        std::string _outfile;
         json stats;
         std::size_t icurrent;
-        int save_all;
+        std::size_t save_all;
     };
 
-    auto statistics = Statistics("stats.json");
 #else
     template <class Mesh>
     void statistics(const std::string& test_case, const Mesh& mesh){};
diff --git a/include/samurai/timers.hpp b/include/samurai/timers.hpp
index e3ab002a5..90b764e5a 100644
--- a/include/samurai/timers.hpp
+++ b/include/samurai/timers.hpp
@@ -297,3 +297,4 @@ namespace samurai
 
     }
 }
+
diff --git a/python/read_stats.py b/python/read_stats.py
index adb4f9193..1f13c2337 100644
--- a/python/read_stats.py
+++ b/python/read_stats.py
@@ -1,15 +1,85 @@
+
+#
+#
+# categories = ['A', 'B', 'C', 'D']
+# valeurs_moyennes = [3, 7, 5, 9]
+# valeurs_min = [2, 5, 4, 8]
+# valeurs_max = [4, 9, 6, 10]
+
+# # Création du graphique
+# plt.figure(figsize=(10, 6))
+
+# # Tracé des barres
+# plt.bar(categories, valeurs_moyennes, yerr=[(v_min, v_max) for v_min, v_max in zip(valeurs_min, valeurs_max)], capsize=5)
+
+import numpy
+import argparse
 import pandas as pd
 import matplotlib.pyplot as plt
 
-test_case = "D2Q4444_Euler_Lax_Liu"
+# arguments parser
+CLI = argparse.ArgumentParser()
+
+# list of PDF files
+CLI.add_argument("--file", type=str,  default=[""], help="stats file")
 
-data = pd.read_json('stats.json')
+# parse the command line
+args = CLI.parse_args()
+
+test_case = "stats"
+
+data = pd.read_json( args.file )
 data = pd.json_normalize(data[test_case])
 
+print(type(print(data.min_level)))
+print(data.min_level)
+
 min_level = data.min_level.min()
 max_level = data.max_level.max()
 levels = range(min_level, max_level+1)
 
+print("> Levels: {}".format(levels))
+
+def plot_minmax( suffix, title, xlabel, ylabel, ax, level_range, kind='box', legend=None):
+    """
+        level_range (in) : range of levels (min_level, max_level)
+    """
+    print("\t> [plot_minmax] Entering .. ")
+    # keys in dict. of data
+    _tmp_fields = []
+    _tmp_name   = []
+    _minmaxave = {}
+    for elem in ["min", "max", "ave"]:
+        _fields = [f'by_level.{l:02}.{suffix}.{elem}' for l in level_range]
+        
+        _minmaxave[elem] =  data[ _fields ].to_numpy()[0]
+
+        _tmp_fields.append( _fields )
+        _tmp_name.append( {f: l for f, l in zip(_fields, levels)} )
+    
+    # ax.set_xlabel(xlabel)
+    # ax.set_ylabel(ylabel)
+    ax.set_xlim( level_range.start-1, level_range.stop )
+    ax.set_title(title, fontweight="bold", fontsize=10)
+
+    x = numpy.arange(min_level, max_level+1)
+    spacing = 0.3  # spacing between hat groups
+    width = (1 - spacing) / x.shape[0]
+    ax.set_xticks( x, labels=x )
+    
+    ax.errorbar( x, _minmaxave["ave"], yerr=[_minmaxave["ave"]-_minmaxave["min"], _minmaxave["max"]-_minmaxave["ave"]], \
+                 ls='None', marker='o', ms=6, capsize=5, c="b" )
+    
+    # heights0 = values[0]
+    # for i, (heights, group_label) in enumerate(zip(values, group_labels)):
+    #     style = {'fill': False} if i == 0 else {'edgecolor': 'black'}
+    #     rects = ax.bar(x - spacing/2 + i * width, heights - heights0,
+    #                    width, bottom=heights0, label=group_label, **style)
+    #     label_bars(heights, rects)
+
+    # print(to_plot)
+    # print(type(to_plot))
+
 def plot(suffix, title, xlabel, ylabel, ax, kind='box', legend=None, stacked=True):
     fields = [f'by_level.{l:02}.{suffix}' for l in levels]
     new_name = {f: l for f, l in zip(fields, levels)}
@@ -18,20 +88,35 @@ def plot(suffix, title, xlabel, ylabel, ax, kind='box', legend=None, stacked=Tru
     to_plot.plot(kind=kind, ax=ax, stacked=stacked)
     ax.set_xlabel(xlabel)
     ax.set_ylabel(ylabel)
-    ax.set_title(title, fontweight="bold")
+    ax.set_title(title, fontweight="bold", fontsize=10)
     if legend:
         ax.legend(title=legend)
 
 nrow, ncol = 3, 4
 fig = plt.figure(figsize=(5*ncol, 5*nrow))
 
-plot("cells", "Number of cells per level", "Time iteration", "number of cells", fig.add_subplot(nrow, 2, 1), kind='area', legend="Level")
+plot("cells", "Number of cells per level", "Time iteration", "number of cells", fig.add_subplot(nrow, 2, 1), kind="box", legend="Level")
+
+plot("axis-0.number of intervals", "Number of intervals per level in x-axis", "Time iteration", \
+     "number of intervals", fig.add_subplot(nrow, 2, 3), kind='box', legend="Level", stacked=False)
+
+plot("axis-1.number of intervals", "Number of intervals per level in y-axis", "Time iteration", \
+     "number of intervals", fig.add_subplot(nrow, 2, 5), kind='box', legend="Level", stacked=False)
+
+# plot min/max numbers of cells per level
+# plot("axis-0.cells per interval.min", "Minimum of cells per interval\n in x-axis", "level", \
+#      "number of cells", fig.add_subplot(nrow, ncol, 3))
+
+# plot("axis-0.cells per interval.max", "Maximum of cells per interval\n in x-axis", "level", \
+#      "number of cells", fig.add_subplot(nrow, ncol, 4))
 
-plot("axis-0.number of intervals", "Number of intervals per level in x-axis", "Time iteration", "number of intervals", fig.add_subplot(nrow, 2, 3), kind='area', legend="Level", stacked=False)
-plot("axis-1.number of intervals", "Number of intervals per level in y-axis", "Time iteration", "number of intervals", fig.add_subplot(nrow, 2, 5), kind='area', legend="Level", stacked=False)
+ax = fig.add_subplot(nrow, ncol, 3)
+plot_minmax("axis-0.cells per interval", "[Min, Max, Ave] Cells per interval in x-axis", "Level", \
+            "number of cells", fig.add_subplot(nrow, ncol, 3), level_range=levels, kind='box', legend="Level")
+plot_minmax("axis-1.cells per interval", "[Min, Max, Ave] Cells per interval in y-axis", "Level", \
+            "number of cells", fig.add_subplot(nrow, ncol, 4), level_range=levels, kind='box', legend="Level")
+# ax.bar(numpy.arange(min_level, max_level+1), valeurs_moyennes, yerr=[(v_min, v_max) for v_min, v_max in zip(valeurs_min, valeurs_max)], capsize=5)
 
-plot("axis-0.cells per interval.min", "Minimum of cells per interval\n in x-axis", "level", "number of cells", fig.add_subplot(nrow, ncol, 3))
-plot("axis-0.cells per interval.max", "Maximum of cells per interval\n in x-axis", "level", "number of cells", fig.add_subplot(nrow, ncol, 4))
 
 plot("axis-1.cells per interval.min", "Minimum of cells per interval\n in y-axis", "level", "number of cells", fig.add_subplot(nrow, ncol, 7))
 plot("axis-1.cells per interval.max", "Maximum of cells per interval\n in y-axis", "level", "number of cells", fig.add_subplot(nrow, ncol, 8))
diff --git a/python/stats_mpi.py b/python/stats_mpi.py
new file mode 100644
index 000000000..4f3337db6
--- /dev/null
+++ b/python/stats_mpi.py
@@ -0,0 +1,160 @@
+
+#
+#
+# categories = ['A', 'B', 'C', 'D']
+# valeurs_moyennes = [3, 7, 5, 9]
+# valeurs_min = [2, 5, 4, 8]
+# valeurs_max = [4, 9, 6, 10]
+
+# # Création du graphique
+# plt.figure(figsize=(10, 6))
+
+# # Tracé des barres
+# plt.bar(categories, valeurs_moyennes, yerr=[(v_min, v_max) for v_min, v_max in zip(valeurs_min, valeurs_max)], capsize=5)
+
+import numpy
+import re
+import argparse
+import pandas as pd
+import matplotlib.pyplot as plt
+
+def plot_mma( data, suffix, ax, xlabel, ylabel, keys=None ):
+    """
+    """
+    
+    # select specific domaine if not None
+    if keys == None:
+        keys = data.keys()
+    
+    print("plot data for domaine {}".format(keys))
+
+    for mprank in keys:
+        levels = range( data[ mprank ].min_level.min(), data[ mprank ].max_level.max()+1 )
+
+        for elem in ["min", "max", "ave"]:
+            fields = [f'by_level.{l:02}.{suffix}.{elem}' for l in levels]
+            print("\t>[plot] Getting field: '{}'".format(fields))
+
+    #     x = numpy.arange( levels.start, levels.stop )
+    #     y = data[ mprank ][ fields ].to_numpy()[0]
+
+    #     ax.plot( x, y, ls='None', markersize=5, marker='o' )
+    
+    ax.set_xlabel( xlabel, fontsize=10 )
+    ax.set_ylabel( ylabel, fontsize=10 )
+
+def plot_by_level( data, suffix, ax, xlabel, ylabel, keys=None ):
+    """
+        Plot data level by level
+    """
+    
+    # select specific domaine if not None
+    if keys == None:
+        keys = data.keys()
+    
+    print("plot data for domaine {}".format(keys))
+
+    for mprank in keys:
+        levels = range( data[ mprank ].min_level.min(), data[ mprank ].max_level.max()+1 )
+        fields = [f'by_level.{l:02}.{suffix}' for l in levels]
+
+        x = numpy.arange( levels.start, levels.stop )
+        y = data[ mprank ][ fields ].to_numpy()[0]
+
+        ax.plot( x, y, ls='None', markersize=5, marker='o' )
+    
+    ax.set_xlabel( xlabel, fontsize=10 )
+    ax.set_ylabel( ylabel, fontsize=10 )
+
+
+def plot_single_quantity( data, suffix, ax, xlabel, ylabel, keys=None ):
+    """
+        Plot data level by level
+    """
+    
+    # select specific domaine if not None
+    if keys == None:
+        keys = list(data.keys())
+
+    rmin = int( min(keys) )
+    rmax = int( max(keys) )
+
+    x = numpy.zeros( len(keys) )
+    y = numpy.zeros( x.shape )
+
+    for id in range(0, len(keys)):
+        mprank = int( keys[ id ] )
+        x[ id ] = mprank
+        y[ id ] = data[ str(mprank) ][suffix].values[0]
+        print("\t>[plot] Process # {}, data single: '{}'".format( mprank, data[ str(mprank)][suffix].values ))
+
+    ax.plot( x, y, ls='None', markersize=5, marker='o' )
+    
+    ax.set_xlabel( xlabel, fontsize=10 )
+    ax.set_ylabel( ylabel, fontsize=10 )
+
+# arguments parser
+CLI = argparse.ArgumentParser()
+CLI.add_argument("--files", nargs="*", type=str,  default=[""], help="stats file")
+CLI.add_argument("--jtag", type=str, default="stats", help="json tag")
+
+# parse the command line
+args = CLI.parse_args()
+
+min_level = 99
+max_level = 0
+process_stats = {}
+for fin in args.files:
+
+    print("\t> Parsing file: '{}'".format( fin ))
+    fdata = pd.read_json( fin )
+    fdata = pd.json_normalize( fdata[ args.jtag ] )
+
+    match = re.search(r'process_(\d+)', fin)
+    if match:
+        rank = match.group(1)
+    else:
+        rank = 0
+    
+    # compute min/max level overall datasets
+    min_level = min( min_level, fdata.min_level.min() )
+    max_level = max( max_level, fdata.max_level.max() )
+
+    assert rank not in process_stats.keys(), "statistics for process {} exists".format(rank)
+    process_stats[ rank ] = fdata
+
+
+nrow, ncol = 3, 2
+fig = plt.figure(figsize=(5*ncol, 5*nrow))
+
+# plot("cells", "Number of cells per level", "Level", "number of cells",  )
+
+ax = fig.add_subplot(nrow, ncol, 1)
+plot_by_level( process_stats, "cells", ax, xlabel="level", ylabel="Number of cells" )
+ax.set_xlim( min_level-1, max_level+1 )
+ax.set_title( "Number of cells", fontweight="bold", fontsize=10 )
+x = numpy.arange(min_level, max_level+1)
+ax.set_xticks( x, labels=x )
+
+ax = fig.add_subplot(nrow, ncol, 2)
+plot_by_level( process_stats, "axis-0.number of intervals", ax, xlabel="level", ylabel="Number of intervals" )
+ax.set_xlim( min_level-1, max_level+1 )
+ax.set_title( "Number of intervals", fontweight="bold", fontsize=10 )
+x = numpy.arange(min_level, max_level+1)
+ax.set_xticks( x, labels=x )
+
+ax = fig.add_subplot(nrow, ncol, 3)
+plot_single_quantity( process_stats, "n_neighbours", ax, xlabel="MPI rank", ylabel="Number of neighbours" )
+ax.set_title( "Number of geometrical neighbours", fontweight="bold", fontsize=10 )
+# x = numpy.arange(min_level, max_level+1)
+# ax.set_xticks( x, labels=x )
+
+ax = fig.add_subplot(nrow, ncol, 4)
+plot_mma( process_stats, "axis-0.cells per interval", ax, xlabel="level", ylabel="Number of cells" )
+ax.set_xlim( min_level-1, max_level+1 )
+ax.set_title( "Number of cells per interval x-axis", fontweight="bold", fontsize=10 )
+x = numpy.arange(min_level, max_level+1)
+ax.set_xticks( x, labels=x )
+
+
+plt.savefig( f"{args.jtag}.png", dpi=300 )
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 6a4a5dd10..6de6d9666 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -28,6 +28,9 @@ set(SAMURAI_TESTS
     test_portion.cpp
     test_scaling.cpp
     test_utils.cpp
+    test_sfc.cpp
+    test_mrmesh.cpp
+    test_loadbalancing.cpp
 )
 
 if(rapidcheck_FOUND)
diff --git a/tests/main.cpp b/tests/main.cpp
index a3344fda4..9767eb035 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -1,13 +1,15 @@
-#ifdef SAMURAI_WITH_MPI
-#include <boost/mpi.hpp>
-#endif
 #include <gtest/gtest.h>
+#include <samurai/samurai.hpp>
 
 int main(int argc, char* argv[])
 {
-#ifdef SAMURAI_WITH_MPI
-    boost::mpi::environment env(argc, argv);
-#endif
+    samurai::initialize();
+
     ::testing::InitGoogleTest(&argc, argv);
-    return RUN_ALL_TESTS();
+
+    int ret = RUN_ALL_TESTS();
+
+    samurai::finalize();
+
+    return ret;
 }
diff --git a/tests/test_adapt.cpp b/tests/test_adapt.cpp
index 5ba4637e1..c56e1df2a 100644
--- a/tests/test_adapt.cpp
+++ b/tests/test_adapt.cpp
@@ -27,7 +27,7 @@ namespace samurai
 
     TYPED_TEST(adapt_test, mutliple_fields)
     {
-        ::samurai::initialize();
+        // ::samurai::initialize();
 
         static constexpr std::size_t dim = TypeParam::value;
         using config                     = MRConfig<dim>;
@@ -38,6 +38,7 @@ namespace samurai
 
         auto adapt = make_MRAdapt(u_1, u_2, u_3);
         adapt(1e-4, 2);
-        ::samurai::finalize();
+
+        // ::samurai::finalize();
     }
 }
diff --git a/tests/test_cell.cpp b/tests/test_cell.cpp
index 9893d47dd..49a00ef9c 100644
--- a/tests/test_cell.cpp
+++ b/tests/test_cell.cpp
@@ -50,4 +50,41 @@ namespace samurai
         xt::xarray<double> expected{.5, .5};
         EXPECT_EQ(c.corner(), expected);
     }
-}
+
+    /**
+     * Test of samurai::cell_length function (cell.hpp)
+    */
+    TEST(cell, cell_length){
+        EXPECT_DOUBLE_EQ(samurai::cell_length(1), 0.5);
+        EXPECT_DOUBLE_EQ(samurai::cell_length(3), 0.125);
+        EXPECT_DOUBLE_EQ(samurai::cell_length(20), 9.5367431640625e-07 );
+    }
+
+    /**
+     * Test of cell::face_center function (cell.hpp)
+    */
+    TEST(cell, face_center){
+        auto indices = xt::xtensor_fixed<int, xt::xshape<2>>({1, 1});
+        samurai::Cell<2, Interval<int>> c { 1, indices, 0 };
+
+        {
+            auto dir_x_p = xt::xtensor_fixed<int, xt::xshape<2>>({1, 0});
+            auto fxp = c.face_center( dir_x_p );
+            EXPECT_DOUBLE_EQ(  fxp(0), 1. );
+        }
+
+        {
+            auto dir_x_m = xt::xtensor_fixed<int, xt::xshape<2>>({-1, 0});
+            auto fxp = c.face_center( dir_x_m );
+            EXPECT_DOUBLE_EQ(  fxp(0), 0.5 );
+        }
+
+        {
+            auto dir_y_p = xt::xtensor_fixed<int, xt::xshape<2>>({0, 1});
+            auto fxp = c.face_center( dir_y_p );
+            EXPECT_DOUBLE_EQ(  fxp(1), 1. );
+        }
+
+    }
+
+}
\ No newline at end of file
diff --git a/tests/test_loadbalancing.cpp b/tests/test_loadbalancing.cpp
new file mode 100644
index 000000000..bec339199
--- /dev/null
+++ b/tests/test_loadbalancing.cpp
@@ -0,0 +1,149 @@
+#include <gtest/gtest.h>
+#include <xtensor/xarray.hpp>
+#include <xtensor/xfixed.hpp>
+
+#include <samurai/samurai.hpp>
+#include <samurai/mr/mesh.hpp>
+#include <samurai/interval.hpp>
+#include <samurai/field.hpp>
+#include <samurai/hdf5.hpp>
+
+#ifdef SAMURAI_WITH_MPI
+#include <samurai/load_balancing.hpp>
+#endif
+
+#include <vector>
+
+namespace samurai {
+
+#ifdef SAMURAI_WITH_MPI
+    /*
+    * test cmptLoad;
+    */
+    TEST(loadBalance, cmptLoad){
+
+        constexpr int dim = 2;
+
+        using Config = samurai::MRConfig<dim>;
+        using Mesh_t = samurai::MRMesh<Config>;
+
+        samurai::CellList<dim> cl;
+        cl[0][{0}].add_interval({0, 4});
+        cl[0][{1}].add_interval({0, 1});
+        cl[0][{1}].add_interval({3, 4});
+        cl[1][{2}].add_interval({2, 6});
+        cl[2][{6}].add_interval({4, 6});
+        cl[2][{6}].add_interval({10, 12});
+
+        Mesh_t mesh( cl, 0, 2 );
+
+        auto ncells = cmptLoad<BalanceElement_t::CELL>( mesh );
+        auto ninter = cmptLoad<BalanceElement_t::INTERVAL>( mesh );
+
+        ASSERT_EQ( ncells, 14 );
+        ASSERT_EQ( ninter,  6 );
+
+    }
+
+    /**
+     * Be aware that the cmptInterface require 2:1 balance for this test
+    */
+    TEST(loadBalance, cmptInterface){
+        constexpr int dim = 2;
+        
+        using Config = samurai::MRConfig<dim>;
+        using Mesh_t = samurai::MRMesh<Config>;
+
+        samurai::CellList<dim> cl_a, cl_b;
+
+        {
+            cl_a[0][{0}].add_interval({0, 4});   // level 0 
+            
+            cl_a[1][{2}].add_interval({0, 2});   // level 1 
+            cl_a[1][{3}].add_interval({0, 2});   // level 1
+            cl_a[1][{2}].add_interval({6, 8});   // level 1 
+            cl_a[1][{3}].add_interval({6, 8});   // level 1
+
+            cl_a[1][{2}].add_interval({2, 6});   // level 1
+            cl_a[2][{6}].add_interval({4, 6});   // level 2
+            cl_a[2][{6}].add_interval({10, 12}); // level 2 
+        }
+
+        {
+            // cl_b[0][{2}].add_interval({0, 4});   // level 0 
+            cl_b[0][{3}].add_interval({0, 4});   // level 0
+            
+            cl_b[1][{4}].add_interval({0, 8});   // level 1 
+            cl_b[1][{5}].add_interval({0, 8});   // level 1
+
+            cl_b[1][{3}].add_interval({3, 5});   // level 1
+            cl_b[2][{7}].add_interval({4, 6});   // level 2
+            cl_b[2][{7}].add_interval({10, 12}); // level 2 
+        }
+
+        Mesh_t mesh_a ( cl_a, 0, 2 ), mesh_b (cl_b, 0, 2 );
+
+        auto interface = cmptInterface<dim, Direction_t::FACE, 1>( mesh_a, mesh_b );
+
+        ASSERT_EQ( interface.nb_cells(), 10 );
+
+    }
+
+    TEST(loadBalance, cellExists){
+        constexpr int dim = 2;
+        
+        using Config = samurai::MRConfig<dim, 1>;
+        using Mesh_t = samurai::MRMesh<Config>;
+
+        samurai::CellList<dim> cl_a, cl_b;
+
+        {
+            cl_a[0][{0}].add_interval({0, 4});   // level 0 
+            
+            cl_a[1][{2}].add_interval({0, 2});   // level 1 
+            cl_a[1][{3}].add_interval({0, 2});   // level 1
+            cl_a[1][{2}].add_interval({6, 8});   // level 1 
+            cl_a[1][{3}].add_interval({6, 8});   // level 1
+
+            cl_a[1][{2}].add_interval({2, 6});   // level 1
+            cl_a[2][{6}].add_interval({4, 6});   // level 2
+            cl_a[2][{6}].add_interval({10, 12}); // level 2 
+        }
+
+        Mesh_t mesh ( cl_a, 0, 2 );
+
+        {
+            xt::xtensor_fixed<int, xt::xshape<Mesh_t::dim>> ij = {{3, 2}};
+            ASSERT_TRUE( samurai::cellExists( mesh, Mesh_t::mesh_id_t::cells, static_cast<size_t>(1), ij ) );
+        }
+
+        {
+            xt::xtensor_fixed<int, xt::xshape<Mesh_t::dim>> ij = {{0, 1}};
+            ASSERT_FALSE( samurai::cellExists( mesh, Mesh_t::mesh_id_t::cells, static_cast<size_t>(0), ij ) );
+        }
+
+        {
+            xt::xtensor_fixed<int, xt::xshape<Mesh_t::dim>> ij = {{0, 2}};
+            ASSERT_TRUE( samurai::cellExists( mesh, Mesh_t::mesh_id_t::reference, static_cast<size_t>(0), ij ) );
+        }
+
+        auto coords      = make_field<int, dim, false>("coordinates", mesh);
+        auto level_field = make_field<std::size_t, 1, false>("level", mesh);
+        for_each_cell(mesh[Mesh_t::mesh_id_t::reference], [&](auto& cell) {
+                            if constexpr ( dim == 1 )
+                            {
+                                coords[cell] = cell.indices[0];
+                            }
+                            else
+                            {
+                                coords[cell] = cell.indices;
+                            }
+                            level_field[cell] = cell.level;
+                        });
+
+        samurai::save( "./", "test-lb-cellExists", {true, true}, mesh, coords, level_field );
+
+    }
+
+#endif
+}
diff --git a/tests/test_mrmesh.cpp b/tests/test_mrmesh.cpp
new file mode 100644
index 000000000..62f0f427f
--- /dev/null
+++ b/tests/test_mrmesh.cpp
@@ -0,0 +1,245 @@
+#include <gtest/gtest.h>
+
+#include <samurai/hdf5.hpp>
+#include <samurai/mr/mesh.hpp>
+#include <samurai/samurai.hpp>
+#include <samurai/subset/subset_op.hpp>
+
+#include <vector>
+
+namespace samurai
+{
+
+    template <int dim>
+    inline samurai::CellList<dim> getInitState()
+    {
+        assert(dim == 2);
+
+        samurai::CellList<dim> cl;
+        cl[0][{0}].add_interval({0, 4});
+        cl[0][{1}].add_interval({0, 1});
+        cl[0][{1}].add_interval({3, 4});
+        cl[0][{2}].add_interval({0, 1});
+        cl[0][{2}].add_interval({3, 4});
+        cl[0][{3}].add_interval({0, 3});
+
+        cl[1][{2}].add_interval({2, 6});
+        cl[1][{3}].add_interval({2, 6});
+        cl[1][{4}].add_interval({2, 4});
+        cl[1][{4}].add_interval({5, 6});
+        cl[1][{5}].add_interval({2, 6});
+        cl[1][{6}].add_interval({6, 8});
+        cl[1][{7}].add_interval({6, 7});
+
+        cl[2][{8}].add_interval({8, 10});
+        cl[2][{9}].add_interval({8, 10});
+        cl[2][{14}].add_interval({14, 16});
+        cl[2][{15}].add_interval({14, 16});
+
+        return cl;
+    }
+
+    /*
+     * test MR Mesh;
+     */
+    TEST(mrmesh, test_nbcells2D)
+    {
+        constexpr int dim = 2;
+
+        using Config = samurai::MRConfig<dim>;
+        using Mesh_t = samurai::MRMesh<Config>;
+
+        Mesh_t mesh(getInitState<dim>(), 0, 2);
+
+        samurai::save("./", "test_mrmesh_cells_and_ghosts", mesh);
+
+        ASSERT_EQ(mesh.min_level(), 0);
+        ASSERT_EQ(mesh.max_level(), 2);
+
+        /**
+         * Need fix, this does not work.
+         */
+        // std::vector<size_t> nCellPerLevel_withGhost = {36, 48, 32}; // including ghost
+        // for (size_t ilvl = mesh.min_level(); ilvl <= mesh.max_level(); ++ilvl)
+        // {
+        //     std::cerr << "cells_and_ghosts [" << ilvl << "]: " << mesh.nb_cells(ilvl, samurai::MRMeshId::cells_and_ghosts) << std::endl;
+        //     // ASSERT_EQ(mesh.nb_cells(ilvl), nCellPerLevel_withGhost[ilvl]);
+        // }
+
+        std::vector<size_t> nCellPerLevel_proj = {5, 2, 0}; // proj cells
+        for (size_t ilvl = mesh.min_level(); ilvl <= mesh.max_level(); ++ilvl)
+        {
+            // std::cerr << "proj_cells [" << ilvl << "]: " << mesh.nb_cells(ilvl, samurai::MRMeshId::proj_cells) << std::endl;
+            ASSERT_EQ(mesh.nb_cells(ilvl, samurai::MRMeshId::proj_cells), nCellPerLevel_proj[ilvl]);
+        }
+
+        std::vector<size_t> nCellPerLevel_leaves = {11, 18, 8}; // not including ghost
+        for (size_t ilvl = mesh.min_level(); ilvl <= mesh.max_level(); ++ilvl)
+        {
+            ASSERT_EQ(mesh.nb_cells(ilvl, samurai::MRMeshId::cells), nCellPerLevel_leaves[ilvl]);
+        }
+    }
+
+    TEST(mrmesh, test_exist2D)
+    {
+        constexpr int dim = 2;
+
+        using Config     = samurai::MRConfig<dim>;
+        using Mesh_t     = samurai::MRMesh<Config>;
+        using Interval_t = typename Mesh_t::interval_t;
+
+        Mesh_t mesh(getInitState<dim>(), 0, 2);
+
+        Interval_t i{0, 3, 0};
+
+        // const auto val = mesh.exists(samurai::MRMeshId::cells, 1, i);
+    }
+
+    TEST(mrmesh, test_merge)
+    {
+        constexpr int dim = 2;
+
+        using Config      = samurai::MRConfig<dim>;
+        using Mesh_t      = samurai::MRMesh<Config>;
+        using CellArray_t = Mesh_t::ca_type;
+
+        samurai::CellList<dim> cl;
+        cl[0][{0}].add_interval({0, 4});
+        cl[0][{1}].add_interval({0, 1});
+        cl[0][{1}].add_interval({3, 4});
+        cl[1][{2}].add_interval({2, 6});
+
+        Mesh_t mesh(cl, 0, 2);
+        ASSERT_EQ(mesh.nb_cells(Mesh_t::mesh_id_t::cells), 10);
+
+        samurai::CellList<dim> to_add_cl;
+        to_add_cl[1][{3}].add_interval({2, 6});
+
+        CellArray_t to_add_ca = {to_add_cl, false};
+
+        mesh.merge(to_add_ca);
+
+        // technically not sufficient to be sure the merge was done properly
+        ASSERT_EQ(mesh.nb_cells(Mesh_t::mesh_id_t::cells), 14);
+    }
+
+    TEST(mrmesh, test_remove)
+    {
+        constexpr int dim = 2;
+
+        using Config      = samurai::MRConfig<dim>;
+        using Mesh_t      = samurai::MRMesh<Config>;
+        using CellArray_t = Mesh_t::ca_type;
+
+        samurai::CellList<dim> cl;
+        cl[0][{0}].add_interval({0, 4});
+        cl[0][{1}].add_interval({0, 1});
+        cl[0][{1}].add_interval({3, 4});
+        cl[1][{2}].add_interval({2, 6});
+        cl[1][{3}].add_interval({2, 6});
+
+        Mesh_t mesh(cl, 0, 2);
+        ASSERT_EQ(mesh.nb_cells(Mesh_t::mesh_id_t::cells), 14);
+
+        samurai::CellList<dim> to_rm_cl;
+        to_rm_cl[0][{1}].add_interval({3, 4});
+        to_rm_cl[1][{3}].add_interval({2, 6});
+
+        CellArray_t to_rm_ca = {to_rm_cl, false};
+
+        mesh.remove(to_rm_ca);
+
+        // technically not sufficient to be sure the remove was done properly
+        ASSERT_EQ(mesh.nb_cells(Mesh_t::mesh_id_t::cells), 9);
+    }
+
+    TEST(mrmesh, test_construct_subdomain)
+    {
+        constexpr int dim = 2;
+
+        using Config     = samurai::MRConfig<dim>;
+        using Mesh_t     = samurai::MRMesh<Config>;
+
+        samurai::CellList<dim> cl;
+        cl[0][{0}].add_interval({0, 4});
+        cl[0][{1}].add_interval({0, 1});
+        cl[0][{1}].add_interval({3, 4});
+        cl[0][{2}].add_interval({0, 1});
+        cl[0][{2}].add_interval({3, 4});
+
+        cl[1][{2}].add_interval({2, 6});
+        cl[1][{3}].add_interval({2, 6});
+        cl[1][{4}].add_interval({2, 4});
+        cl[1][{4}].add_interval({5, 6});
+        cl[1][{5}].add_interval({2, 6});
+        cl[1][{6}].add_interval({6, 8});
+        cl[1][{7}].add_interval({6, 7});
+
+        Mesh_t mesh( cl, 0, 1);
+
+        bool intersect_found = false;
+        const auto & subdom = mesh.subdomain();
+        for(std::size_t level=mesh.min_level(); level<mesh.max_level(); ++level ){
+            auto intersect = difference( mesh[Mesh_t::mesh_id_t::cells][level], subdom ).on(level);
+            size_t ninter = 0;
+            intersect( [&]([[maybe_unused]]const auto & interval, [[maybe_unused]]const auto & index ){
+                ninter += 1;
+            });
+
+            if( ninter >  0) { intersect_found = true; }
+        }
+
+        ASSERT_FALSE( intersect_found );
+
+    }
+
+    TEST(mrmesh, test_construct_union)
+    {
+        constexpr int dim = 2;
+
+        using Config = samurai::MRConfig<dim>;
+        using Mesh_t = samurai::MRMesh<Config>;
+        using cl_t   = Mesh_t::cl_type;
+        using ca_t   = Mesh_t::ca_type;
+
+        Mesh_t mesh( getInitState<dim>(), 0, 2);
+
+        auto un = mesh.get_union();
+
+        ASSERT_EQ( un[ 0 ].nb_cells(), 5);
+        {
+            cl_t cl;
+            cl[0][{1}].add_interval({1,3});
+            cl[0][{2}].add_interval({1,3});
+            cl[0][{3}].add_interval({3,4});
+            ca_t ca = { cl, false };
+            auto tmp_ = samurai::difference( ca[0], un[0] );
+            
+            size_t ni = 0;
+            tmp_( [&]([[maybe_unused]]const auto & interval, [[maybe_unused]]const auto & index){
+                ni +=1;
+            });
+            ASSERT_EQ( ni, 0);
+        }
+
+        ASSERT_EQ( un[ 1 ].nb_cells(), 2);
+        {
+            cl_t cl;
+            cl[1][{4}].add_interval({4,5});
+            cl[1][{7}].add_interval({7,8});
+            ca_t ca = { cl, false };
+            auto tmp_ = samurai::difference( ca[1], un[1] );
+            
+            size_t ni = 0;
+            tmp_( [&]([[maybe_unused]]const auto & interval, [[maybe_unused]]const auto & index){
+                ni +=1;
+            });
+            ASSERT_EQ( ni, 0);
+        }
+
+        ASSERT_EQ( un[ 2 ].nb_cells(), 0);
+
+
+    }
+
+}
\ No newline at end of file
diff --git a/tests/test_sfc.cpp b/tests/test_sfc.cpp
new file mode 100644
index 000000000..e72ce4bb4
--- /dev/null
+++ b/tests/test_sfc.cpp
@@ -0,0 +1,190 @@
+#include <gtest/gtest.h>
+#include <xtensor/xarray.hpp>
+#include <xtensor/xfixed.hpp>
+
+#include <samurai/hilbert.hpp>
+#include <samurai/morton.hpp>
+
+#include <vector>
+
+namespace samurai
+{
+    using Coord_2D_t = xt::xtensor_fixed<uint32_t, xt::xshape<2>>;
+    using Coord_3D_t = xt::xtensor_fixed<uint32_t, xt::xshape<3>>;
+
+    /*
+     * test computation of morton indexes 2D
+     */
+    TEST(sfc, morton2D_getKey)
+    {
+        constexpr int dim = 2;
+
+        SFC<Morton> morton;
+
+        Coord_2D_t ij = {0, 0};
+        auto key1     = morton.getKey<dim>(ij);
+        ASSERT_EQ(key1, 0);
+
+        ij        = {2, 1};
+        auto key2 = morton.getKey<dim>(ij);
+        ASSERT_EQ(key2, 6);
+
+        ij        = {5, 6};
+        auto key3 = morton.getKey<dim>(ij);
+        ASSERT_EQ(key3, 57);
+
+        ij        = {0, 4};
+        auto key4 = morton.getKey<dim>(ij);
+        ASSERT_EQ(key4, 32);
+
+        ij        = {4, 0};
+        auto key5 = morton.getKey<dim>(ij);
+        ASSERT_EQ(key5, 16);
+    }
+
+    /*
+     * test computation of (i,j) logical coordinate from morton indexes 2D
+     */
+    TEST(sfc, morton2D_getCoordinates)
+    {
+        constexpr int dim = 2;
+
+        SFC<Morton> morton;
+
+        auto ij = morton.getCoordinates<dim>(static_cast<SFC_key_t>(0));
+        ASSERT_EQ(ij(0), 0);
+        ASSERT_EQ(ij(1), 0);
+
+        auto ij2 = morton.getCoordinates<dim>(static_cast<SFC_key_t>(31));
+        ASSERT_EQ(ij2(0), 7);
+        ASSERT_EQ(ij2(1), 3);
+
+        auto ij3 = morton.getCoordinates<dim>(static_cast<SFC_key_t>(51));
+        ASSERT_EQ(ij3(0), 5);
+        ASSERT_EQ(ij3(1), 5);
+
+        auto ij4 = morton.getCoordinates<dim>(static_cast<SFC_key_t>(39));
+        ASSERT_EQ(ij4(0), 3);
+        ASSERT_EQ(ij4(1), 5);
+    }
+
+    /*
+     * test computation of morton indexes 3D
+     */
+    TEST(sfc, morton3D_getKey)
+    {
+        constexpr int dim = 3;
+
+        SFC<Morton> morton;
+
+        Coord_3D_t ijk = {0, 0, 0};
+        auto key1      = morton.getKey<dim>(ijk);
+        ASSERT_EQ(key1, 0);
+
+        ijk       = {1, 1, 0};
+        auto key2 = morton.getKey<dim>(ijk);
+        ASSERT_EQ(key2, 3);
+
+        ijk       = {0, 0, 1};
+        auto key3 = morton.getKey<dim>(ijk);
+        ASSERT_EQ(key3, 4);
+
+        ijk       = {5, 9, 1};
+        auto key4 = morton.getKey<dim>(ijk);
+        ASSERT_EQ(key4, 1095);
+    }
+
+    /*
+     * test computation of (i,j,k) logical coordinates from morton indexes 3D
+     */
+    TEST(sfc, morton3D_getCoordinates)
+    {
+        constexpr int dim = 3;
+
+        SFC<Morton> morton;
+
+        auto ijk = morton.getCoordinates<dim>(static_cast<SFC_key_t>(1095));
+        ASSERT_EQ(ijk(0), 5);
+        ASSERT_EQ(ijk(1), 9);
+        ASSERT_EQ(ijk(2), 1);
+    }
+
+    /*
+     * test computation of hilbert key for 2D
+     */
+    TEST(sfc, hilbert2D_getKey)
+    {
+        constexpr int dim = 2;
+
+        SFC<Hilbert> hilbert;
+
+        Coord_2D_t ij = {0, 0};
+        ASSERT_EQ(hilbert.getKey<dim>(ij), static_cast<SFC_key_t>(0));
+
+        ij = {1, 1};
+        ASSERT_EQ(hilbert.getKey<dim>(ij), static_cast<SFC_key_t>(2));
+
+        ij = {1, 2};
+        ASSERT_EQ(hilbert.getKey<dim>(ij), static_cast<SFC_key_t>(7));
+
+        ij = {3, 1};
+        ASSERT_EQ(hilbert.getKey<dim>(ij), static_cast<SFC_key_t>(12));
+
+        ij = {3, 4};
+        ASSERT_EQ(hilbert.getKey<dim>(ij), static_cast<SFC_key_t>(53));
+
+        ij = {3, 5};
+        ASSERT_EQ(hilbert.getKey<dim>(ij), static_cast<SFC_key_t>(52));
+
+        ij = {3, 6};
+        ASSERT_EQ(hilbert.getKey<dim>(ij), static_cast<SFC_key_t>(51));
+    }
+
+    /*
+     * test computation of hilbert key & back to {i,j,k}
+     */
+    // TEST(tests_hilbert, test_consistency_hilbert3D){
+
+    //     {
+    //         Logical_Pos_t pt1 {0, 0 ,0};
+    //         auto hk1 = getHilbertKey3D<uint64_t>( pt1, 0 );
+    //         auto pt1_r = getLogicalFromHilberKey3D( hk1, 0 );
+    //         ASSERT_EQ( pt1.i, pt1_r.i );
+    //         ASSERT_EQ( pt1.j, pt1_r.j );
+    //         ASSERT_EQ( pt1.k, pt1_r.k );
+    //     }
+
+    //     {
+    //         Logical_Pos_t pt1 {2, 3, 1};
+    //         auto hk1 = getHilbertKey3D<uint64_t>( pt1, 2 );
+    //         auto pt1_r = getLogicalFromHilberKey3D( hk1, 2 );
+    //         ASSERT_EQ( pt1.i, pt1_r.i );
+    //         ASSERT_EQ( pt1.j, pt1_r.j );
+    //         ASSERT_EQ( pt1.k, pt1_r.k );
+    //     }
+
+    //     {
+    //         Logical_Pos_t pt1 {8, 14, 28};
+    //         auto hk1 = getHilbertKey3D<uint64_t>( pt1, 6 );
+    //         auto pt1_r = getLogicalFromHilberKey3D( hk1, 6 );
+    //         ASSERT_EQ( pt1.i, pt1_r.i );
+    //         ASSERT_EQ( pt1.j, pt1_r.j );
+    //         ASSERT_EQ( pt1.k, pt1_r.k );
+    //     }
+
+    // }
+
+    /*
+     * test computation of morton indexes 3D
+     */
+    // TEST(tests_hilbert, test_consistency_hilbert3D_old){
+
+    // {
+    //     auto hk1 = get_hilbert_key_3D( {0, 0 ,0}, 0 );
+    //     auto pt1_r = get_logical_from_hilbert_3D( hk1, 0, 3);
+    //     ASSERT_EQ( 0, pt1_r[0] );
+    //     ASSERT_EQ( 0, pt1_r[1] );
+    //     ASSERT_EQ( 0, pt1_r[2] );
+    // }
+
+}
\ No newline at end of file