From 25243a9f04730c9710936d63cc9ccce7fc760af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 31 Jul 2024 10:36:05 +0200 Subject: [PATCH 001/101] added count to mhp algorithms --- include/dr/mhp.hpp | 1 + include/dr/mhp/algorithms/count.hpp | 162 ++++++++++++++++++++++++++++ test/gtest/common/count.cpp | 38 +++++++ 3 files changed, 201 insertions(+) create mode 100644 include/dr/mhp/algorithms/count.hpp create mode 100644 test/gtest/common/count.cpp diff --git a/include/dr/mhp.hpp b/include/dr/mhp.hpp index b13d5007ef..8bad2caeee 100644 --- a/include/dr/mhp.hpp +++ b/include/dr/mhp.hpp @@ -75,5 +75,6 @@ #include #include #include +#include #include #include diff --git a/include/dr/mhp/algorithms/count.hpp b/include/dr/mhp/algorithms/count.hpp new file mode 100644 index 0000000000..3ec838df76 --- /dev/null +++ b/include/dr/mhp/algorithms/count.hpp @@ -0,0 +1,162 @@ +// SPDX-FileCopyrightText: Intel Corporation +// +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +namespace dr::mhp::__detail { + +inline auto add_counts(rng::forward_range auto &&r) { + rng::range_difference_t zero{}; + + return std::accumulate(rng::begin(r), rng::end(r), zero); +} + +inline auto std_count_if(rng::forward_range auto &&r, auto &&pred) { + using count_type = rng::range_difference_t; + + if (rng::empty(r)) { + return count_type{}; + } + + return std::count_if(std::execution::par_unseq, + dr::__detail::direct_iterator(rng::begin(r)), + dr::__detail::direct_iterator(rng::end(r)), + pred); +} + +inline auto dpl_count_if(rng::forward_range auto &&r, auto &&pred) { + using count_type = rng::range_difference_t; + +#ifdef SYCL_LANGUAGE_VERSION + if (rng::empty(r)) { + return count_type{}; + } + + return std::count_if(dpl_policy(), + dr::__detail::direct_iterator(rng::begin(r)), + dr::__detail::direct_iterator(rng::end(r)), + pred); +#else + assert(false); + return count_type{}; +#endif +} + +template +auto count_if(std::size_t root, bool root_provided, DR &&dr, auto &&pred) { + using count_type = rng::range_difference_t; + auto comm = default_comm(); + + if (rng::empty(dr)) { + return count_type{}; + } + + if (aligned(dr)) { + dr::drlog.debug("Parallel count\n"); + + // Count within the local segments + auto count = [=](auto &&r) { + assert(rng::size(r) > 0); + if (mhp::use_sycl()) { + dr::drlog.debug(" with DPL\n"); + return dpl_count_if(r, pred); + } else { + dr::drlog.debug(" with CPU\n"); + return std_count_if(r, pred); + } + }; + + auto locals = rng::views::transform(local_segments(dr), count); + auto local = add_counts(locals); + + std::vector all(comm.size()); + if (root_provided) { + // Everyone gathers to root, only root adds up the counts + comm.gather(local, std::span{all}, root); + if (root == comm.rank()) { + return add_counts(all); + } else { + return count_type{}; + } + } else { + // Everyone gathers and everyone adds up the counts + comm.all_gather(local, all); + return add_counts(all); + } + } else { + dr::drlog.debug("Serial count\n"); + count_type result{}; + if (!root_provided || root == comm.rank()) { + result = add_counts(dr); + } + barrier(); + return result; + } +} + +} // namespace dr::mhp::__detail + +namespace dr::mhp { + +// +// Ranges +// + +// range, elem, w/wo root + +template +auto count(std::size_t root, DR &&dr, const T& value) { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(root, true, dr, pred); +} + +template +auto count(DR &&dr, const T& value) { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(0, false, dr, pred); +} + +// range, predicate, w/wo root + +template +auto count_if(std::size_t root, DR &&dr, auto &&pred) { + return __detail::count_if(root, true, dr, pred); +} + +template +auto count_if(DR &&dr, auto &&pred) { + return __detail::count_if(0, false, dr, pred); +} + +// +// Iterators +// + +// range, elem, w/wo root + +template +auto count(std::size_t root, DI first, DI last, const T& value) { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(root, true, rng::subrange(first, last), pred); +} + +template +auto count(DI first, DI last, const T& value) { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(0, false, rng::subrange(first, last), pred); +} + +// range, predicate, w/wo root + +template +auto count_if(std::size_t root, DI first, DI last, auto &&pred) { + return __detail::count_if(root, true, rng::subrange(first, last), pred); +} + +template +auto count_if(DI first, DI last, auto &&pred) { + return __detail::count_if(0, false, rng::subrange(first, last), pred); +} + +}; // namespace dr::mhp diff --git a/test/gtest/common/count.cpp b/test/gtest/common/count.cpp new file mode 100644 index 0000000000..371f33f8c9 --- /dev/null +++ b/test/gtest/common/count.cpp @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Intel Corporation +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "xhp-tests.hpp" + +// Fixture +template class Count : public testing::Test { +protected: +}; + +TYPED_TEST_SUITE(Count, AllTypes); + +TYPED_TEST(Count, BasicFirstElem) { + Ops1 ops(10); + auto value = *ops.vec.begin(); + + EXPECT_EQ(std::count(ops.vec.begin(), ops.vec.end(), value), + xhp::count(ops.dist_vec, value)); +} + +TYPED_TEST(Count, BasicFirstElemIf) { + Ops1 ops(10); + auto value = *ops.vec.begin(); + auto pred = [=](auto &&v) { v == value; } + + EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), + xhp::count_if(ops.dist_vec, pred)); +} + +TYPED_TEST(Count, FirstElemsIf) { + Ops1 ops(10); + auto value = *ops.vec.begin(); + auto pred = [=](auto &&v) { v < 5; } + + EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), + xhp::count_if(ops.dist_vec, pred)); +} From 7eec868759d304352e7b79052ae8b92d81733a02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 31 Jul 2024 10:44:27 +0200 Subject: [PATCH 002/101] minor fix --- include/dr/mp.hpp | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/include/dr/mp.hpp b/include/dr/mp.hpp index 4fa7001089..f9598bbcd8 100644 --- a/include/dr/mp.hpp +++ b/include/dr/mp.hpp @@ -52,34 +52,6 @@ #include #include -<<<<<<< HEAD:include/dr/mhp.hpp -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -======= #include #include #include @@ -93,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -106,4 +79,3 @@ #include #include #include ->>>>>>> a9468e93d71f48ca7b977472fd01d63965c21d75:include/dr/mp.hpp From 6090fdcb1dc15e45dd9b0638412fd060b8ad7dbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 31 Jul 2024 11:15:57 +0200 Subject: [PATCH 003/101] minor fixes --- include/dr/mp/algorithms/count.hpp | 63 ++++++++++++++---------------- test/gtest/common/count.cpp | 13 +++--- test/gtest/mp/CMakeLists.txt | 3 +- 3 files changed, 38 insertions(+), 41 deletions(-) diff --git a/include/dr/mp/algorithms/count.hpp b/include/dr/mp/algorithms/count.hpp index 3ec838df76..75d438d8f0 100644 --- a/include/dr/mp/algorithms/count.hpp +++ b/include/dr/mp/algorithms/count.hpp @@ -4,11 +4,11 @@ #pragma once -namespace dr::mhp::__detail { +namespace dr::mp::__detail { inline auto add_counts(rng::forward_range auto &&r) { rng::range_difference_t zero{}; - + return std::accumulate(rng::begin(r), rng::end(r), zero); } @@ -19,10 +19,9 @@ inline auto std_count_if(rng::forward_range auto &&r, auto &&pred) { return count_type{}; } - return std::count_if(std::execution::par_unseq, + return std::count_if(std::execution::par_unseq, dr::__detail::direct_iterator(rng::begin(r)), - dr::__detail::direct_iterator(rng::end(r)), - pred); + dr::__detail::direct_iterator(rng::end(r)), pred); } inline auto dpl_count_if(rng::forward_range auto &&r, auto &&pred) { @@ -33,10 +32,9 @@ inline auto dpl_count_if(rng::forward_range auto &&r, auto &&pred) { return count_type{}; } - return std::count_if(dpl_policy(), + return std::count_if(mp::dpl_policy(), dr::__detail::direct_iterator(rng::begin(r)), - dr::__detail::direct_iterator(rng::end(r)), - pred); + dr::__detail::direct_iterator(rng::end(r)), pred); #else assert(false); return count_type{}; @@ -46,7 +44,7 @@ inline auto dpl_count_if(rng::forward_range auto &&r, auto &&pred) { template auto count_if(std::size_t root, bool root_provided, DR &&dr, auto &&pred) { using count_type = rng::range_difference_t; - auto comm = default_comm(); + auto comm = mp::default_comm(); if (rng::empty(dr)) { return count_type{}; @@ -58,7 +56,7 @@ auto count_if(std::size_t root, bool root_provided, DR &&dr, auto &&pred) { // Count within the local segments auto count = [=](auto &&r) { assert(rng::size(r) > 0); - if (mhp::use_sycl()) { + if (mp::use_sycl()) { dr::drlog.debug(" with DPL\n"); return dpl_count_if(r, pred); } else { @@ -90,14 +88,14 @@ auto count_if(std::size_t root, bool root_provided, DR &&dr, auto &&pred) { if (!root_provided || root == comm.rank()) { result = add_counts(dr); } - barrier(); + mp::barrier(); return result; } } - -} // namespace dr::mhp::__detail -namespace dr::mhp { +} // namespace dr::mp::__detail + +namespace dr::mp { // // Ranges @@ -106,27 +104,26 @@ namespace dr::mhp { // range, elem, w/wo root template -auto count(std::size_t root, DR &&dr, const T& value) { - auto pred = [=](auto &&v) { return v == value; }; - return __detail::count_if(root, true, dr, pred); +auto count(std::size_t root, DR &&dr, const T &value) { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(root, true, dr, pred); } template -auto count(DR &&dr, const T& value) { - auto pred = [=](auto &&v) { return v == value; }; - return __detail::count_if(0, false, dr, pred); +auto count(DR &&dr, const T &value) { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(0, false, dr, pred); } // range, predicate, w/wo root template auto count_if(std::size_t root, DR &&dr, auto &&pred) { - return __detail::count_if(root, true, dr, pred); + return __detail::count_if(root, true, dr, pred); } -template -auto count_if(DR &&dr, auto &&pred) { - return __detail::count_if(0, false, dr, pred); +template auto count_if(DR &&dr, auto &&pred) { + return __detail::count_if(0, false, dr, pred); } // @@ -136,27 +133,27 @@ auto count_if(DR &&dr, auto &&pred) { // range, elem, w/wo root template -auto count(std::size_t root, DI first, DI last, const T& value) { - auto pred = [=](auto &&v) { return v == value; }; - return __detail::count_if(root, true, rng::subrange(first, last), pred); +auto count(std::size_t root, DI first, DI last, const T &value) { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(root, true, rng::subrange(first, last), pred); } template -auto count(DI first, DI last, const T& value) { - auto pred = [=](auto &&v) { return v == value; }; - return __detail::count_if(0, false, rng::subrange(first, last), pred); +auto count(DI first, DI last, const T &value) { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(0, false, rng::subrange(first, last), pred); } // range, predicate, w/wo root template auto count_if(std::size_t root, DI first, DI last, auto &&pred) { - return __detail::count_if(root, true, rng::subrange(first, last), pred); + return __detail::count_if(root, true, rng::subrange(first, last), pred); } template auto count_if(DI first, DI last, auto &&pred) { - return __detail::count_if(0, false, rng::subrange(first, last), pred); + return __detail::count_if(0, false, rng::subrange(first, last), pred); } -}; // namespace dr::mhp +}; // namespace dr::mp diff --git a/test/gtest/common/count.cpp b/test/gtest/common/count.cpp index 371f33f8c9..6727c0bae3 100644 --- a/test/gtest/common/count.cpp +++ b/test/gtest/common/count.cpp @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: BSD-3-Clause -#include "xhp-tests.hpp" +#include "xp-tests.hpp" // Fixture template class Count : public testing::Test { @@ -16,23 +16,22 @@ TYPED_TEST(Count, BasicFirstElem) { auto value = *ops.vec.begin(); EXPECT_EQ(std::count(ops.vec.begin(), ops.vec.end(), value), - xhp::count(ops.dist_vec, value)); + xp::count(ops.dist_vec, value)); } TYPED_TEST(Count, BasicFirstElemIf) { Ops1 ops(10); auto value = *ops.vec.begin(); - auto pred = [=](auto &&v) { v == value; } + auto pred = [=](auto &&v) { return v == value; }; EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), - xhp::count_if(ops.dist_vec, pred)); + xp::count_if(ops.dist_vec, pred)); } TYPED_TEST(Count, FirstElemsIf) { Ops1 ops(10); - auto value = *ops.vec.begin(); - auto pred = [=](auto &&v) { v < 5; } + auto pred = [=](auto &&v) { return v < 5; }; EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), - xhp::count_if(ops.dist_vec, pred)); + xp::count_if(ops.dist_vec, pred)); } diff --git a/test/gtest/mp/CMakeLists.txt b/test/gtest/mp/CMakeLists.txt index cef65af431..8cefdecc48 100644 --- a/test/gtest/mp/CMakeLists.txt +++ b/test/gtest/mp/CMakeLists.txt @@ -11,6 +11,7 @@ add_executable( mp-tests.cpp ../common/all.cpp ../common/copy.cpp + ../common/count.cpp ../common/counted.cpp ../common/distributed_vector.cpp ../common/drop.cpp @@ -57,7 +58,7 @@ add_executable( add_executable(mp-quick-test mp-tests.cpp - ../common/equal.cpp + ../common/count.cpp ) # cmake-format: on From 167702dc3003a51ca82be38df7aebb7ec35b5c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 2 Aug 2024 19:42:45 +0200 Subject: [PATCH 004/101] code review fixes --- include/dr/mp/algorithms/count.hpp | 140 ++++++++++++----------------- test/gtest/common/count.cpp | 18 +++- 2 files changed, 73 insertions(+), 85 deletions(-) diff --git a/include/dr/mp/algorithms/count.hpp b/include/dr/mp/algorithms/count.hpp index 75d438d8f0..e5f606bcd4 100644 --- a/include/dr/mp/algorithms/count.hpp +++ b/include/dr/mp/algorithms/count.hpp @@ -12,33 +12,23 @@ inline auto add_counts(rng::forward_range auto &&r) { return std::accumulate(rng::begin(r), rng::end(r), zero); } -inline auto std_count_if(rng::forward_range auto &&r, auto &&pred) { - using count_type = rng::range_difference_t; - - if (rng::empty(r)) { - return count_type{}; - } - - return std::count_if(std::execution::par_unseq, - dr::__detail::direct_iterator(rng::begin(r)), - dr::__detail::direct_iterator(rng::end(r)), pred); -} - -inline auto dpl_count_if(rng::forward_range auto &&r, auto &&pred) { - using count_type = rng::range_difference_t; - +inline auto count_if_local(rng::forward_range auto &&r, auto &&pred) { + if (mp::use_sycl()) { + dr::drlog.debug(" with DPL\n"); #ifdef SYCL_LANGUAGE_VERSION - if (rng::empty(r)) { - return count_type{}; - } - - return std::count_if(mp::dpl_policy(), - dr::__detail::direct_iterator(rng::begin(r)), - dr::__detail::direct_iterator(rng::end(r)), pred); + return std::count_if(mp::dpl_policy(), + dr::__detail::direct_iterator(rng::begin(r)), + dr::__detail::direct_iterator(rng::end(r)), pred); #else - assert(false); - return count_type{}; + assert(false); + return rng::range_difference_t{}; #endif + } else { + dr::drlog.debug(" with CPU\n"); + return std::count_if(std::execution::par_unseq, + dr::__detail::direct_iterator(rng::begin(r)), + dr::__detail::direct_iterator(rng::end(r)), pred); + } } template @@ -56,15 +46,8 @@ auto count_if(std::size_t root, bool root_provided, DR &&dr, auto &&pred) { // Count within the local segments auto count = [=](auto &&r) { assert(rng::size(r) > 0); - if (mp::use_sycl()) { - dr::drlog.debug(" with DPL\n"); - return dpl_count_if(r, pred); - } else { - dr::drlog.debug(" with CPU\n"); - return std_count_if(r, pred); - } + return count_if_local(r, pred); }; - auto locals = rng::views::transform(local_segments(dr), count); auto local = add_counts(locals); @@ -97,63 +80,58 @@ auto count_if(std::size_t root, bool root_provided, DR &&dr, auto &&pred) { namespace dr::mp { -// -// Ranges -// - -// range, elem, w/wo root - -template -auto count(std::size_t root, DR &&dr, const T &value) { - auto pred = [=](auto &&v) { return v == value; }; - return __detail::count_if(root, true, dr, pred); -} - -template -auto count(DR &&dr, const T &value) { - auto pred = [=](auto &&v) { return v == value; }; - return __detail::count_if(0, false, dr, pred); -} - -// range, predicate, w/wo root +class count_fn_ { +public: + template + auto operator()(std::size_t root, DR &&dr, const T &value) const { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(root, true, dr, pred); + } -template -auto count_if(std::size_t root, DR &&dr, auto &&pred) { - return __detail::count_if(root, true, dr, pred); -} + template + auto operator()(DR &&dr, const T &value) const { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(0, false, dr, pred); + } -template auto count_if(DR &&dr, auto &&pred) { - return __detail::count_if(0, false, dr, pred); -} + template + auto operator()(std::size_t root, DI first, DI last, const T &value) const { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(root, true, rng::subrange(first, last), pred); + } -// -// Iterators -// + template + auto operator()(DI first, DI last, const T &value) const { + auto pred = [=](auto &&v) { return v == value; }; + return __detail::count_if(0, false, rng::subrange(first, last), pred); + } +}; -// range, elem, w/wo root +inline constexpr count_fn_ count; -template -auto count(std::size_t root, DI first, DI last, const T &value) { - auto pred = [=](auto &&v) { return v == value; }; - return __detail::count_if(root, true, rng::subrange(first, last), pred); -} +class count_if_fn_ { +public: + template + auto operator()(std::size_t root, DR &&dr, auto &&pred) const { + return __detail::count_if(root, true, dr, pred); + } -template -auto count(DI first, DI last, const T &value) { - auto pred = [=](auto &&v) { return v == value; }; - return __detail::count_if(0, false, rng::subrange(first, last), pred); -} + template + auto operator()(DR &&dr, auto &&pred) const { + return __detail::count_if(0, false, dr, pred); + } -// range, predicate, w/wo root + template + auto operator()(std::size_t root, DI first, DI last, auto &&pred) const { + return __detail::count_if(root, true, rng::subrange(first, last), pred); + } -template -auto count_if(std::size_t root, DI first, DI last, auto &&pred) { - return __detail::count_if(root, true, rng::subrange(first, last), pred); -} + template + auto operator()(DI first, DI last, auto &&pred) const { + return __detail::count_if(0, false, rng::subrange(first, last), pred); + } +}; -template -auto count_if(DI first, DI last, auto &&pred) { - return __detail::count_if(0, false, rng::subrange(first, last), pred); -} +inline constexpr count_if_fn_ count_if; }; // namespace dr::mp diff --git a/test/gtest/common/count.cpp b/test/gtest/common/count.cpp index 6727c0bae3..96feef0f0a 100644 --- a/test/gtest/common/count.cpp +++ b/test/gtest/common/count.cpp @@ -12,7 +12,12 @@ template class Count : public testing::Test { TYPED_TEST_SUITE(Count, AllTypes); TYPED_TEST(Count, BasicFirstElem) { - Ops1 ops(10); + std::vector vec { 1, 2, 3, 1, 1, 3, 4, 1, 5, 6, 7 }; + + Ops1 ops(vec.size()); + ops.vec = vec; + xp::copy(ops.vec, ops.dist_vec.begin()); + auto value = *ops.vec.begin(); EXPECT_EQ(std::count(ops.vec.begin(), ops.vec.end(), value), @@ -20,8 +25,13 @@ TYPED_TEST(Count, BasicFirstElem) { } TYPED_TEST(Count, BasicFirstElemIf) { - Ops1 ops(10); - auto value = *ops.vec.begin(); + std::vector vec { 1, 2, 3, 1, 1, 3, 4, 1, 5, 6, 7 }; + + Ops1 ops(vec.size()); + ops.vec = vec; + xp::copy(ops.vec, ops.dist_vec.begin()); + + auto value = *vec.begin(); auto pred = [=](auto &&v) { return v == value; }; EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), @@ -29,7 +39,7 @@ TYPED_TEST(Count, BasicFirstElemIf) { } TYPED_TEST(Count, FirstElemsIf) { - Ops1 ops(10); + Ops1 ops(20); auto pred = [=](auto &&v) { return v < 5; }; EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), From 755c8965fbfde23c57824e7c5d4de2ef52feb288 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 5 Aug 2024 18:57:03 +0200 Subject: [PATCH 005/101] more code review fixes --- include/dr/mp/algorithms/count.hpp | 1 - test/gtest/common/count.cpp | 23 ++++++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/dr/mp/algorithms/count.hpp b/include/dr/mp/algorithms/count.hpp index e5f606bcd4..a66c7196a7 100644 --- a/include/dr/mp/algorithms/count.hpp +++ b/include/dr/mp/algorithms/count.hpp @@ -21,7 +21,6 @@ inline auto count_if_local(rng::forward_range auto &&r, auto &&pred) { dr::__detail::direct_iterator(rng::end(r)), pred); #else assert(false); - return rng::range_difference_t{}; #endif } else { dr::drlog.debug(" with CPU\n"); diff --git a/test/gtest/common/count.cpp b/test/gtest/common/count.cpp index 96feef0f0a..0fcb3b2d92 100644 --- a/test/gtest/common/count.cpp +++ b/test/gtest/common/count.cpp @@ -11,6 +11,18 @@ template class Count : public testing::Test { TYPED_TEST_SUITE(Count, AllTypes); +TYPED_TEST(Count, EmptyIf) { + std::vector vec; + + Ops1 ops(0); + + auto pred = [=](auto &&v) { return true; }; + + EXPECT_EQ(xp::count_if(ops.dist_vec, pred), 0); + EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), + xp::count_if(ops.dist_vec, pred)); +} + TYPED_TEST(Count, BasicFirstElem) { std::vector vec { 1, 2, 3, 1, 1, 3, 4, 1, 5, 6, 7 }; @@ -20,6 +32,7 @@ TYPED_TEST(Count, BasicFirstElem) { auto value = *ops.vec.begin(); + EXPECT_EQ(xp::count(ops.dist_vec, value), 4); EXPECT_EQ(std::count(ops.vec.begin(), ops.vec.end(), value), xp::count(ops.dist_vec, value)); } @@ -34,14 +47,22 @@ TYPED_TEST(Count, BasicFirstElemIf) { auto value = *vec.begin(); auto pred = [=](auto &&v) { return v == value; }; + EXPECT_EQ(xp::count_if(ops.dist_vec, pred), 4); EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), xp::count_if(ops.dist_vec, pred)); } TYPED_TEST(Count, FirstElemsIf) { - Ops1 ops(20); + std::vector vec(20); + std::iota(vec.begin(), vec.end(), 0); + + Ops1 ops(vec.size()); + ops.vec = vec; + xp::copy(ops.vec, ops.dist_vec.begin()); + auto pred = [=](auto &&v) { return v < 5; }; + EXPECT_EQ(xp::count_if(ops.dist_vec, pred), 5); EXPECT_EQ(std::count_if(ops.vec.begin(), ops.vec.end(), pred), xp::count_if(ops.dist_vec, pred)); } From e98de3bc478b902909ed07e05e1631c24d030eb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 16 Aug 2024 13:14:33 +0200 Subject: [PATCH 006/101] removed redundant conditional --- include/dr/mp/algorithms/count.hpp | 50 ++++++++++++------------------ 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/include/dr/mp/algorithms/count.hpp b/include/dr/mp/algorithms/count.hpp index a66c7196a7..78e33557db 100644 --- a/include/dr/mp/algorithms/count.hpp +++ b/include/dr/mp/algorithms/count.hpp @@ -39,39 +39,29 @@ auto count_if(std::size_t root, bool root_provided, DR &&dr, auto &&pred) { return count_type{}; } - if (aligned(dr)) { - dr::drlog.debug("Parallel count\n"); - - // Count within the local segments - auto count = [=](auto &&r) { - assert(rng::size(r) > 0); - return count_if_local(r, pred); - }; - auto locals = rng::views::transform(local_segments(dr), count); - auto local = add_counts(locals); - - std::vector all(comm.size()); - if (root_provided) { - // Everyone gathers to root, only root adds up the counts - comm.gather(local, std::span{all}, root); - if (root == comm.rank()) { - return add_counts(all); - } else { - return count_type{}; - } - } else { - // Everyone gathers and everyone adds up the counts - comm.all_gather(local, all); + dr::drlog.debug("Parallel count\n"); + + // Count within the local segments + auto count = [=](auto &&r) { + assert(rng::size(r) > 0); + return count_if_local(r, pred); + }; + auto locals = rng::views::transform(local_segments(dr), count); + auto local = add_counts(locals); + + std::vector all(comm.size()); + if (root_provided) { + // Everyone gathers to root, only root adds up the counts + comm.gather(local, std::span{all}, root); + if (root == comm.rank()) { return add_counts(all); + } else { + return count_type{}; } } else { - dr::drlog.debug("Serial count\n"); - count_type result{}; - if (!root_provided || root == comm.rank()) { - result = add_counts(dr); - } - mp::barrier(); - return result; + // Everyone gathers and everyone adds up the counts + comm.all_gather(local, all); + return add_counts(all); } } From f31b80ce104d992508db41430e5fdcfc7f0713ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sun, 25 Aug 2024 17:07:16 +0200 Subject: [PATCH 007/101] fixes according to pre-commit checks --- include/dr/mp/algorithms/count.hpp | 6 +++--- test/gtest/common/count.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/dr/mp/algorithms/count.hpp b/include/dr/mp/algorithms/count.hpp index 78e33557db..3616cded70 100644 --- a/include/dr/mp/algorithms/count.hpp +++ b/include/dr/mp/algorithms/count.hpp @@ -71,13 +71,13 @@ namespace dr::mp { class count_fn_ { public: - template + template auto operator()(std::size_t root, DR &&dr, const T &value) const { auto pred = [=](auto &&v) { return v == value; }; return __detail::count_if(root, true, dr, pred); } - template + template auto operator()(DR &&dr, const T &value) const { auto pred = [=](auto &&v) { return v == value; }; return __detail::count_if(0, false, dr, pred); @@ -105,7 +105,7 @@ class count_if_fn_ { return __detail::count_if(root, true, dr, pred); } - template + template auto operator()(DR &&dr, auto &&pred) const { return __detail::count_if(0, false, dr, pred); } diff --git a/test/gtest/common/count.cpp b/test/gtest/common/count.cpp index 0fcb3b2d92..f6442055fd 100644 --- a/test/gtest/common/count.cpp +++ b/test/gtest/common/count.cpp @@ -24,7 +24,7 @@ TYPED_TEST(Count, EmptyIf) { } TYPED_TEST(Count, BasicFirstElem) { - std::vector vec { 1, 2, 3, 1, 1, 3, 4, 1, 5, 6, 7 }; + std::vector vec{1, 2, 3, 1, 1, 3, 4, 1, 5, 6, 7}; Ops1 ops(vec.size()); ops.vec = vec; @@ -38,7 +38,7 @@ TYPED_TEST(Count, BasicFirstElem) { } TYPED_TEST(Count, BasicFirstElemIf) { - std::vector vec { 1, 2, 3, 1, 1, 3, 4, 1, 5, 6, 7 }; + std::vector vec{1, 2, 3, 1, 1, 3, 4, 1, 5, 6, 7}; Ops1 ops(vec.size()); ops.vec = vec; From 5511751dd91b8567b980031e1a61dd122276dc47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 11 Nov 2024 16:19:18 +0100 Subject: [PATCH 008/101] added cyclic_halo_impl and distributed_vector_dual --- .../dr/mp/containers/distributed_vector.hpp | 1 + .../mp/containers/distributed_vector_dual.hpp | 299 ++++++++++++++++++ include/dr/mp/halo.hpp | 54 ++++ 3 files changed, 354 insertions(+) create mode 100644 include/dr/mp/containers/distributed_vector_dual.hpp diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 2611963064..89ced161b9 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Intel Corporation +// SPDX-FileCopyrightText: Intel Corporation // // SPDX-License-Identifier: BSD-3-Clause diff --git a/include/dr/mp/containers/distributed_vector_dual.hpp b/include/dr/mp/containers/distributed_vector_dual.hpp new file mode 100644 index 0000000000..c2b8b48e21 --- /dev/null +++ b/include/dr/mp/containers/distributed_vector_dual.hpp @@ -0,0 +1,299 @@ +// SPDX-FileCopyrightText: Intel Corporation +// SPDX-FileCopyrightText: Intel Corporation +// +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include +#include +#include +#include + +namespace dr::mp { + +class MpiBackend { + dr::rma_window win_; + +public: + void *allocate(std::size_t data_size) { + assert(data_size > 0); + void *data = __detail::allocator().allocate(data_size); + DRLOG("called MPI allocate({}) -> got:{}", data_size, data); + win_.create(default_comm(), data, data_size); + active_wins().insert(win_.mpi_win()); + return data; + } + + void deallocate(void *data, std::size_t data_size) { + assert(data_size > 0); + DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); + active_wins().erase(win_.mpi_win()); + win_.free(); + __detail::allocator().deallocate(static_cast(data), + data_size); + } + + void getmem(void *dst, std::size_t offset, std::size_t datalen, + int segment_index) { + DRLOG("calling MPI get(dst:{}, " + "segm_offset:{}, size:{}, peer:{})", + dst, offset, datalen, segment_index); + +#if (MPI_VERSION >= 4) || \ + (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000)) + // 64-bit API inside + win_.get(dst, datalen, segment_index, offset); +#else + for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) { + std::size_t s = std::min(remainder, (std::size_t)INT_MAX); + DRLOG("{}:{} win_.get total {} now {} bytes at off {}, dst offset {}", + default_comm().rank(), __LINE__, datalen, s, off, offset + off); + win_.get((uint8_t *)dst + off, s, segment_index, offset + off); + off += s; + remainder -= s; + } +#endif + } + + void putmem(void const *src, std::size_t offset, std::size_t datalen, + int segment_index) { + DRLOG("calling MPI put(segm_offset:{}, " + "src:{}, size:{}, peer:{})", + offset, src, datalen, segment_index); + +#if (MPI_VERSION >= 4) || \ + (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000)) + // 64-bit API inside + win_.put(src, datalen, segment_index, offset); +#else + for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) { + std::size_t s = std::min(remainder, (std::size_t)INT_MAX); + DRLOG("{}:{} win_.put {} bytes at off {}, dst offset {}", + default_comm().rank(), __LINE__, s, off, offset + off); + win_.put((uint8_t *)src + off, s, segment_index, offset + off); + off += s; + remainder -= s; + } +#endif + } + + std::size_t getrank() { return win_.communicator().rank(); } + + void fence() { win_.fence(); } +}; + +/// distributed vector +template +class distributed_vector_dual { + +public: + using value_type = T; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using backend_type = BackendT; + + class iterator { + public: + using iterator_category = std::random_access_iterator_tag; + using value_type = typename distributed_vector::value_type; + using difference_type = typename distributed_vector::difference_type; + + iterator() {} + iterator(const distributed_vector *parent, difference_type offset) + : parent_(parent), offset_(offset) {} + + auto operator+(difference_type n) const { + return iterator(parent_, offset_ + n); + } + friend auto operator+(difference_type n, const iterator &other) { + return other + n; + } + auto operator-(difference_type n) const { + return iterator(parent_, offset_ - n); + } + auto operator-(iterator other) const { return offset_ - other.offset_; } + + auto &operator+=(difference_type n) { + offset_ += n; + return *this; + } + auto &operator-=(difference_type n) { + offset_ -= n; + return *this; + } + auto &operator++() { + offset_++; + return *this; + } + auto operator++(int) { + auto old = *this; + offset_++; + return old; + } + auto &operator--() { + offset_--; + return *this; + } + auto operator--(int) { + auto old = *this; + offset_--; + return old; + } + + bool operator==(iterator other) const { + if (parent_ == nullptr || other.parent_ == nullptr) { + return false; + } else { + return offset_ == other.offset_; + } + } + auto operator<=>(iterator other) const { + assert(parent_ == other.parent_); + return offset_ <=> other.offset_; + } + + auto operator*() const { + auto segment_size = parent_->segment_size_; + return parent_ + ->segments()[offset_ / segment_size][offset_ % segment_size]; + } + auto operator[](difference_type n) const { return *(*this + n); } + + auto local() { + auto segment_size = parent_->segment_size_; + return (parent_->segments()[offset_ / segment_size].begin() + + offset_ % segment_size) + .local(); + } + + // + // Support for distributed ranges + // + // distributed iterator provides segments + // remote iterator provides local + // + auto segments() { + return dr::__detail::drop_segments(parent_->segments(), offset_); + } + + private: + const distributed_vector *parent_ = nullptr; + difference_type offset_; + }; + + // Do not copy + // We need a move constructor for the implementation of reduce algorithm + distributed_vector_dual(const distributed_vector_dual &) = delete; + distributed_vector_dual &operator=(const distributed_vector_dual &) = delete; + distributed_vector_dual(distributed_vector_dual &&) { assert(false); } + + /// Constructor + distributed_vector_dual(std::size_t size = 0, + distribution dist = distribution()) { + init(size, dist); + } + + /// Constructor + distributed_vector_dual(std::size_t size, value_type fill_value, + distribution dist = distribution()) { + init(size, dist); + mp::fill(*this, fill_value); + } + + ~distributed_vector_dual() { + if (finalized()) return; + + fence(); + + for (size_t i = 0; i < segments_per_proc; i++) { + if (datas_[i] != nullptr) { + backend.deallocate(data_, data_size_ * sizeof(value_type)); + } + + delete halos_[i]; + } + + delete halo_; + } + + /// Returns iterator to beginning= + auto begin() const { return iterator(this, 0); } + /// Returns iterator to end + auto end() const { return begin() + size_; } + + /// Returns size + auto size() const { return size_; } + /// Returns reference using index + auto operator[](difference_type n) const { return *(begin() + n); } + + auto &halo() const { return *halo_; } + + auto segments() const { return rng::views::all(segments_); } + + void fence() { backend.fence(); } + +private: + void init(auto size, auto dist) { + size_ = size; + distribution_ = dist; + + // determine the distribution of data + auto comm_size = default_comm().size(); // dr-style ignore + auto hb = dist.halo(); + std::size_t gran = dist.granularity(); + // TODO: make this an error that is reported back to user + assert(size % gran == 0 && "size must be a multiple of the granularity"); + assert(hb.prev % gran == 0 && "size must be a multiple of the granularity"); + assert(hb.next % gran == 0 && "size must be a multiple of the granularity"); + + auto proc_segments_size = gran * std::max({ + (size / gran + comm_size - 1) / comm_size, + hb.prev / gran, + hb.next / gran}); + segment_size_ = proc_segments_size / segments_per_proc; + + data_size_ = segment_size_ + hb.prev + hb.next; + + for (std::size_t i = 0; i < segments_per_proc; i++) { + if (size_ > 0) { + datas_[i] = static_cast( backend.allocate(data_size_ * sizeof(T))); + } + + halos_[i] = new span_halo(default_comm(), datas_[i], data_size_, hb); + } + + halo_ = new cyclic_span_halo(halos_); + + std::size_t segment_index = 0; + for (std::size_t i = 0; i < size; i += segment_size_) { + segments_.emplace_back(this, segment_index++, + std::min(segment_size_, size - i), data_size_); + } + + fence(); + } + + friend dv_segment_iterator; + + static constexpr std::size_t segments_per_proc = 2; + + std::size_t segment_size_ = 0; + std::size_t data_size_ = 0; // size + halo + + std::array *, segments_per_proc> halos_; + std::array datas_; + cyclic_span_halo *halo_; + + distribution distribution_; + std::size_t size_; + std::vector> segments_; + BackendT backend; +}; + +template +auto &halo(const distributed_vector_dual &dv) { + return dv.halo(); +} + +} // namespace dr::mp diff --git a/include/dr/mp/halo.hpp b/include/dr/mp/halo.hpp index 7f7b7dbdb1..45face674f 100644 --- a/include/dr/mp/halo.hpp +++ b/include/dr/mp/halo.hpp @@ -418,6 +418,60 @@ class span_halo : public span_halo_impl { } }; +template +class cyclic_span_halo { +public: + using group_type = span_group; + using halo_type = span_halo; + + cyclic_span_halo(const std::vector& halos) + : halos_(halos) { + for (const auto& halo : halos_) { + assert(halo != nullptr); + } + } + + void exchange_begin() { + halos_[next_comm_index_]->exchange_begin(); + } + + void exchange_finalize() { + halos_[next_comm_index_]->exchange_finalize(); + //increment_index(); + } + + void exchange() { + halos_[next_comm_index_]->exchange(); + //increment_index(); + } + + void reduce_begin() { + halos_[next_comm_index_]->reduce_begin(); + } + + void reduce_finalize(const auto &op) { + halos_[next_comm_index_]->reduce_finalize(op); + //increment_index(); + } + + void reduce_finalize() { + halos_[next_comm_index_]->reduce_finalize(); + //increment_index(); + } + + void swap() { + increment_index(); + } + +private: + void increment_index() { + next_comm_index_ = (next_comm_index_ + 1) % halos_.size(); + } + + std::vector halos_; + std::size_t next_comm_index_ = 0; +} + } // namespace dr::mp #ifdef DR_FORMAT From e1e99106cffb1fecf892f799f7a8e618fa374a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 2 Dec 2024 23:45:04 +0100 Subject: [PATCH 009/101] added dual_segment and refined dual_distributed_vector --- include/dr/mp/algorithms/for_each.hpp | 43 +++++- .../dr/mp/containers/distributed_vector.hpp | 1 - ...r_dual.hpp => dual_distributed_vector.hpp} | 126 +++++------------- include/dr/mp/containers/dual_segment.hpp | 31 +++++ include/dr/mp/halo.hpp | 4 +- 5 files changed, 111 insertions(+), 94 deletions(-) rename include/dr/mp/containers/{distributed_vector_dual.hpp => dual_distributed_vector.hpp} (58%) create mode 100644 include/dr/mp/containers/dual_segment.hpp diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index 8851208198..80c1cbeb43 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -18,7 +18,48 @@ namespace dr::mp { -/// Collective for_each on distributed range +template +concept dual_vector_range = + dr::distributed_range && requires(R &r) { local_segments(r)[0].is_compute(); }; + +void for_each(dual_vector_range auto &&dr, auto op) { + assert(false); + + dr::drlog.debug(dr::logger::for_each, "for_each: parallel execution\n"); + if (rng::empty(dr)) { + return; + } + assert(aligned(dr)); + + for (auto &s : local_segments(dr)) { + if (!s.is_compute()) { + s.swap_state(); + continue; + } + + if (mp::use_sycl()) { + dr::drlog.debug(" using sycl\n"); + + assert(rng::distance(s) > 0); +#ifdef SYCL_LANGUAGE_VERSION + dr::__detail::parallel_for( + dr::mp::sycl_queue(), sycl::range<1>(rng::distance(s)), + [first = rng::begin(s), op](auto idx) { op(first[idx]); }) + .wait(); +#else + assert(false); +#endif + } else { + dr::drlog.debug(" using cpu\n"); + rng::for_each(s, op); + } + + s.swap_state(); + } + barrier(); +} + +// Collective for_each on distributed range void for_each(dr::distributed_range auto &&dr, auto op) { dr::drlog.debug(dr::logger::for_each, "for_each: parallel execution\n"); if (rng::empty(dr)) { diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 89ced161b9..2611963064 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Intel Corporation -// SPDX-FileCopyrightText: Intel Corporation // // SPDX-License-Identifier: BSD-3-Clause diff --git a/include/dr/mp/containers/distributed_vector_dual.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp similarity index 58% rename from include/dr/mp/containers/distributed_vector_dual.hpp rename to include/dr/mp/containers/dual_distributed_vector.hpp index c2b8b48e21..88b799644e 100644 --- a/include/dr/mp/containers/distributed_vector_dual.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Intel Corporation -// SPDX-FileCopyrightText: Intel Corporation // // SPDX-License-Identifier: BSD-3-Clause @@ -8,84 +7,13 @@ #include #include #include -#include +#include namespace dr::mp { -class MpiBackend { - dr::rma_window win_; - -public: - void *allocate(std::size_t data_size) { - assert(data_size > 0); - void *data = __detail::allocator().allocate(data_size); - DRLOG("called MPI allocate({}) -> got:{}", data_size, data); - win_.create(default_comm(), data, data_size); - active_wins().insert(win_.mpi_win()); - return data; - } - - void deallocate(void *data, std::size_t data_size) { - assert(data_size > 0); - DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); - active_wins().erase(win_.mpi_win()); - win_.free(); - __detail::allocator().deallocate(static_cast(data), - data_size); - } - - void getmem(void *dst, std::size_t offset, std::size_t datalen, - int segment_index) { - DRLOG("calling MPI get(dst:{}, " - "segm_offset:{}, size:{}, peer:{})", - dst, offset, datalen, segment_index); - -#if (MPI_VERSION >= 4) || \ - (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000)) - // 64-bit API inside - win_.get(dst, datalen, segment_index, offset); -#else - for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) { - std::size_t s = std::min(remainder, (std::size_t)INT_MAX); - DRLOG("{}:{} win_.get total {} now {} bytes at off {}, dst offset {}", - default_comm().rank(), __LINE__, datalen, s, off, offset + off); - win_.get((uint8_t *)dst + off, s, segment_index, offset + off); - off += s; - remainder -= s; - } -#endif - } - - void putmem(void const *src, std::size_t offset, std::size_t datalen, - int segment_index) { - DRLOG("calling MPI put(segm_offset:{}, " - "src:{}, size:{}, peer:{})", - offset, src, datalen, segment_index); - -#if (MPI_VERSION >= 4) || \ - (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000)) - // 64-bit API inside - win_.put(src, datalen, segment_index, offset); -#else - for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) { - std::size_t s = std::min(remainder, (std::size_t)INT_MAX); - DRLOG("{}:{} win_.put {} bytes at off {}, dst offset {}", - default_comm().rank(), __LINE__, s, off, offset + off); - win_.put((uint8_t *)src + off, s, segment_index, offset + off); - off += s; - remainder -= s; - } -#endif - } - - std::size_t getrank() { return win_.communicator().rank(); } - - void fence() { win_.fence(); } -}; - /// distributed vector template -class distributed_vector_dual { +class dual_distributed_vector { public: using value_type = T; @@ -96,11 +24,11 @@ class distributed_vector_dual { class iterator { public: using iterator_category = std::random_access_iterator_tag; - using value_type = typename distributed_vector::value_type; - using difference_type = typename distributed_vector::difference_type; + using value_type = typename dual_distributed_vector::value_type; + using difference_type = typename dual_distributed_vector::difference_type; iterator() {} - iterator(const distributed_vector *parent, difference_type offset) + iterator(const dual_distributed_vector *parent, difference_type offset) : parent_(parent), offset_(offset) {} auto operator+(difference_type n) const { @@ -178,37 +106,37 @@ class distributed_vector_dual { } private: - const distributed_vector *parent_ = nullptr; + const dual_distributed_vector *parent_ = nullptr; difference_type offset_; }; // Do not copy // We need a move constructor for the implementation of reduce algorithm - distributed_vector_dual(const distributed_vector_dual &) = delete; - distributed_vector_dual &operator=(const distributed_vector_dual &) = delete; - distributed_vector_dual(distributed_vector_dual &&) { assert(false); } + dual_distributed_vector(const dual_distributed_vector &) = delete; + dual_distributed_vector &operator=(const dual_distributed_vector &) = delete; + dual_distributed_vector(dual_distributed_vector &&) { assert(false); } /// Constructor - distributed_vector_dual(std::size_t size = 0, + dual_distributed_vector(std::size_t size = 0, distribution dist = distribution()) { init(size, dist); } /// Constructor - distributed_vector_dual(std::size_t size, value_type fill_value, + dual_distributed_vector(std::size_t size, value_type fill_value, distribution dist = distribution()) { init(size, dist); mp::fill(*this, fill_value); } - ~distributed_vector_dual() { + ~dual_distributed_vector() { if (finalized()) return; fence(); for (size_t i = 0; i < segments_per_proc; i++) { if (datas_[i] != nullptr) { - backend.deallocate(data_, data_size_ * sizeof(value_type)); + backend.deallocate(datas_[i], data_size_ * sizeof(value_type)); } delete halos_[i]; @@ -257,7 +185,7 @@ class distributed_vector_dual { for (std::size_t i = 0; i < segments_per_proc; i++) { if (size_ > 0) { - datas_[i] = static_cast( backend.allocate(data_size_ * sizeof(T))); + datas_[i] = static_cast(backend.allocate(data_size_ * sizeof(T))); } halos_[i] = new span_halo(default_comm(), datas_[i], data_size_, hb); @@ -266,15 +194,33 @@ class distributed_vector_dual { halo_ = new cyclic_span_halo(halos_); std::size_t segment_index = 0; + bool first_half = true; for (std::size_t i = 0; i < size; i += segment_size_) { - segments_.emplace_back(this, segment_index++, + segments_.emplace_back(this, segment_index, std::min(segment_size_, size - i), data_size_); + + if (first_half) { + if (segment_index < comm_size - 1) { + segment_index++; + } else { + first_half = false; + } + } else { + segment_index--; + } + } + + for (auto& s: segments) { + if (s.is_local()) { + s.swap_state(); + break; + } } fence(); } - friend dv_segment_iterator; + friend dv_segment_iterator; static constexpr std::size_t segments_per_proc = 2; @@ -287,12 +233,12 @@ class distributed_vector_dual { distribution distribution_; std::size_t size_; - std::vector> segments_; + std::vector> segments_; BackendT backend; }; template -auto &halo(const distributed_vector_dual &dv) { +auto &halo(const dual_distributed_vector &dv) { return dv.halo(); } diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp new file mode 100644 index 0000000000..645d206fe7 --- /dev/null +++ b/include/dr/mp/containers/dual_segment.hpp @@ -0,0 +1,31 @@ +// SPDX-FileCopyrightText: Intel Corporation +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "segment.hpp" + +#pragma once + +namespace dr::mp { + +template class dual_dv_segment : dv_segment { +private: + using iterator = dv_segment_iterator; + +public: + using difference_type = std::ptrdiff_t; + dual_dv_segment() = default; + dual_dv_segment(DV *dv, std::size_t segment_index, std::size_t size, + std::size_t reserved) + : dv_segment(dv, segment_index, size, reserved) { + } + + bool is_compute() const { return _is_compute; } + + void swap_state() { _is_compute = !_is_compute; } + +private: + bool _is_compute = true; +}; // dual_dv_segment + +} // namespace dr::mp diff --git a/include/dr/mp/halo.hpp b/include/dr/mp/halo.hpp index 45face674f..87681b1d5b 100644 --- a/include/dr/mp/halo.hpp +++ b/include/dr/mp/halo.hpp @@ -418,7 +418,7 @@ class span_halo : public span_halo_impl { } }; -template +template > class cyclic_span_halo { public: using group_type = span_group; @@ -470,7 +470,7 @@ class cyclic_span_halo { std::vector halos_; std::size_t next_comm_index_ = 0; -} +}; } // namespace dr::mp From bdecda7a33326eb8560b80f04121f8eafad1322c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 25 Dec 2024 18:46:19 +0100 Subject: [PATCH 010/101] progress --- include/dr/mp.hpp | 1 + .../mp/containers/dual_distributed_vector.hpp | 12 +- include/dr/mp/containers/dual_segment.hpp | 4 +- test/gtest/mp/CMakeLists.txt | 3 +- test/gtest/mp/dual_distributed_vector.cpp | 115 ++++++++++++++++++ 5 files changed, 126 insertions(+), 9 deletions(-) create mode 100644 test/gtest/mp/dual_distributed_vector.cpp diff --git a/include/dr/mp.hpp b/include/dr/mp.hpp index f9598bbcd8..06f4fb3af9 100644 --- a/include/dr/mp.hpp +++ b/include/dr/mp.hpp @@ -79,3 +79,4 @@ #include #include #include +#include diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 88b799644e..6bb20e11f7 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -185,10 +185,10 @@ class dual_distributed_vector { for (std::size_t i = 0; i < segments_per_proc; i++) { if (size_ > 0) { - datas_[i] = static_cast(backend.allocate(data_size_ * sizeof(T))); + datas_.push_back(static_cast(backend.allocate(data_size_ * sizeof(T)))); } - halos_[i] = new span_halo(default_comm(), datas_[i], data_size_, hb); + halos_.push_back(new span_halo(default_comm(), datas_[i], data_size_, hb)); } halo_ = new cyclic_span_halo(halos_); @@ -210,7 +210,7 @@ class dual_distributed_vector { } } - for (auto& s: segments) { + for (auto& s: segments_) { if (s.is_local()) { s.swap_state(); break; @@ -227,13 +227,13 @@ class dual_distributed_vector { std::size_t segment_size_ = 0; std::size_t data_size_ = 0; // size + halo - std::array *, segments_per_proc> halos_; - std::array datas_; + std::vector *> halos_; + std::vector datas_; cyclic_span_halo *halo_; distribution distribution_; std::size_t size_; - std::vector> segments_; + std::vector> segments_; BackendT backend; }; diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp index 645d206fe7..7cab8f8c36 100644 --- a/include/dr/mp/containers/dual_segment.hpp +++ b/include/dr/mp/containers/dual_segment.hpp @@ -8,7 +8,7 @@ namespace dr::mp { -template class dual_dv_segment : dv_segment { +template class dual_dv_segment : public dv_segment { private: using iterator = dv_segment_iterator; @@ -17,7 +17,7 @@ template class dual_dv_segment : dv_segment { dual_dv_segment() = default; dual_dv_segment(DV *dv, std::size_t segment_index, std::size_t size, std::size_t reserved) - : dv_segment(dv, segment_index, size, reserved) { + : dv_segment(dv, segment_index, size, reserved) { } bool is_compute() const { return _is_compute; } diff --git a/test/gtest/mp/CMakeLists.txt b/test/gtest/mp/CMakeLists.txt index 32f26d120a..7ff7cf9536 100644 --- a/test/gtest/mp/CMakeLists.txt +++ b/test/gtest/mp/CMakeLists.txt @@ -36,6 +36,7 @@ add_executable( communicator.cpp copy.cpp distributed_vector.cpp + dual_distributed_vector.cpp halo.cpp mdstar.cpp mpsort.cpp @@ -58,7 +59,7 @@ add_executable( add_executable(mp-quick-test mp-tests.cpp - ../common/count.cpp + dual_distributed_vector.cpp ) # cmake-format: on diff --git a/test/gtest/mp/dual_distributed_vector.cpp b/test/gtest/mp/dual_distributed_vector.cpp new file mode 100644 index 0000000000..4ff1028382 --- /dev/null +++ b/test/gtest/mp/dual_distributed_vector.cpp @@ -0,0 +1,115 @@ +// SPDX-FileCopyrightText: Intel Corporation +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "xp-tests.hpp" + +using T = int; +using DV = dr::mp::dual_distributed_vector; +using DVI = typename DV::iterator; + +TEST(MpTests, DualDistributedVectorQuery) { + const int n = 10; + DV a(n); + + EXPECT_EQ(a.size(), n); +} + +TEST(MpTests, DualDistributedVectorIndex) { + const std::size_t n = 10; + DV dv(n); + + if (comm_rank == 0) { + for (std::size_t i = 0; i < n; i++) { + dv[i] = i + 10; + } + } + dr::mp::fence(); + + for (std::size_t i = 0; i < n; i++) { + EXPECT_EQ(dv[i], i + 10); + } + + DV dv2(n); + + if (comm_rank == 0) { + dv2[3] = 1000; + dv2[3] = dv[3]; + } + dr::mp::fence(); + EXPECT_EQ(dv2[3], dv[3]); +} + +TEST(MpTests, DualDistributedVectorAlgorithms) { + const std::size_t n = 10; + const int root = 0; + DV dv(n); + + if (comm_rank == root) { + std::vector ref(n); + std::iota(ref.begin(), ref.end(), 1); + + std::iota(dv.begin(), dv.end(), 1); + + EXPECT_TRUE(equal_gtest(dv, ref)); + + std::iota(ref.begin(), ref.end(), 11); + std::copy(ref.begin(), ref.end(), dv.begin()); + EXPECT_TRUE(equal_gtest(dv, ref)); + + std::iota(ref.begin(), ref.end(), 21); + rng::copy(ref, dv.begin()); + EXPECT_TRUE(equal_gtest(dv, ref)); + + std::iota(dv.begin(), dv.end(), 31); + rng::copy(dv, ref.begin()); + EXPECT_TRUE(equal_gtest(dv, ref)); + } +} + +int a; + +// Operations on a const distributed_vector +void common_operations(auto &dv) { + a = dv[1]; + EXPECT_EQ(dv[1], 101); + EXPECT_EQ(*(&(dv[1])), 101); + + auto p = &dv[1]; + EXPECT_EQ(*(p + 1), 102); +} + +TEST(MpTests, DualDistributedVectorReference) { + std::size_t n = 10; + DV dv(n); + if (comm_rank == 0) { + rng::iota(dv, 100); + } + dr::mp::fence(); + + const DV &cdv = dv; + if (comm_rank == 0) { + common_operations(cdv); + common_operations(dv); + } + MPI_Barrier(comm); + + if (comm_rank == 0) { + dv[2] = 2; + } + dr::mp::fence(); + EXPECT_EQ(dv[2], 2); +} + +TEST(MpTests, DualDistributedVectorGranularity) { + std::size_t gran = 3; + std::size_t n = gran * 6; + auto dist = dr::mp::distribution().granularity(gran); + DV dv(n, dist); + + std::size_t previous_size = gran; + for (auto &segment : dr::ranges::segments(dv)) { + EXPECT_EQ(previous_size % gran, 0); + previous_size = segment.size(); + } +} From 811307c30dd7578fc438dd16454e1479f89201b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 25 Dec 2024 19:46:01 +0100 Subject: [PATCH 011/101] tiny fix --- include/dr/mp/containers/dual_distributed_vector.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 6bb20e11f7..6638d47074 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include #include From 53536892e60151016686c58b93587ee1c91cdd85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 25 Dec 2024 19:54:46 +0100 Subject: [PATCH 012/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 6638d47074..6ead1cf082 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -237,9 +237,9 @@ class dual_distributed_vector { BackendT backend; }; -template -auto &halo(const dual_distributed_vector &dv) { - return dv.halo(); -} +// template +// auto &halo(const dual_distributed_vector &dv) { +// return dv.halo(); +// } } // namespace dr::mp From 6701c41c39810e3d99336ffe2e5b364268b9f26b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 25 Dec 2024 19:58:15 +0100 Subject: [PATCH 013/101] prog --- test/gtest/mp/dual_distributed_vector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/gtest/mp/dual_distributed_vector.cpp b/test/gtest/mp/dual_distributed_vector.cpp index 4ff1028382..7930eb882a 100644 --- a/test/gtest/mp/dual_distributed_vector.cpp +++ b/test/gtest/mp/dual_distributed_vector.cpp @@ -67,11 +67,11 @@ TEST(MpTests, DualDistributedVectorAlgorithms) { } } -int a; +int aa; // Operations on a const distributed_vector void common_operations(auto &dv) { - a = dv[1]; + aa = dv[1]; EXPECT_EQ(dv[1], 101); EXPECT_EQ(*(&(dv[1])), 101); From f79fe45360bbdbfa85a1b1e1c438e29faff62f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 25 Dec 2024 20:00:59 +0100 Subject: [PATCH 014/101] prog --- test/gtest/mp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gtest/mp/CMakeLists.txt b/test/gtest/mp/CMakeLists.txt index 7ff7cf9536..4da1e11b6d 100644 --- a/test/gtest/mp/CMakeLists.txt +++ b/test/gtest/mp/CMakeLists.txt @@ -59,7 +59,7 @@ add_executable( add_executable(mp-quick-test mp-tests.cpp - dual_distributed_vector.cpp + count.cpp ) # cmake-format: on From b126f8b779f0aae19e9dfecbfcb4b11df771070f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 25 Dec 2024 20:01:47 +0100 Subject: [PATCH 015/101] prog --- test/gtest/mp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gtest/mp/CMakeLists.txt b/test/gtest/mp/CMakeLists.txt index 4da1e11b6d..20e2761b0d 100644 --- a/test/gtest/mp/CMakeLists.txt +++ b/test/gtest/mp/CMakeLists.txt @@ -59,7 +59,7 @@ add_executable( add_executable(mp-quick-test mp-tests.cpp - count.cpp + ../common/count.cpp ) # cmake-format: on From e1d50c9f9fbf91f7034051649547ef402aaf8704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 25 Dec 2024 20:09:33 +0100 Subject: [PATCH 016/101] prog --- test/gtest/mp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gtest/mp/CMakeLists.txt b/test/gtest/mp/CMakeLists.txt index 20e2761b0d..d7dcf21eac 100644 --- a/test/gtest/mp/CMakeLists.txt +++ b/test/gtest/mp/CMakeLists.txt @@ -36,7 +36,7 @@ add_executable( communicator.cpp copy.cpp distributed_vector.cpp - dual_distributed_vector.cpp + # dual_distributed_vector.cpp halo.cpp mdstar.cpp mpsort.cpp From 0da1f2ce35f97cb8bea6a33e300c34ddbfd9a2c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 25 Dec 2024 22:56:05 +0100 Subject: [PATCH 017/101] prog --- test/gtest/mp/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/gtest/mp/CMakeLists.txt b/test/gtest/mp/CMakeLists.txt index d7dcf21eac..7ff7cf9536 100644 --- a/test/gtest/mp/CMakeLists.txt +++ b/test/gtest/mp/CMakeLists.txt @@ -36,7 +36,7 @@ add_executable( communicator.cpp copy.cpp distributed_vector.cpp - # dual_distributed_vector.cpp + dual_distributed_vector.cpp halo.cpp mdstar.cpp mpsort.cpp @@ -59,7 +59,7 @@ add_executable( add_executable(mp-quick-test mp-tests.cpp - ../common/count.cpp + dual_distributed_vector.cpp ) # cmake-format: on From 871bd58d0f79891682612ff4f4eb44cdc6bb27f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 15:30:41 +0100 Subject: [PATCH 018/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 6ead1cf082..e7d23b8cf8 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -119,29 +119,38 @@ class dual_distributed_vector { /// Constructor dual_distributed_vector(std::size_t size = 0, distribution dist = distribution()) { + std::cout << "dual_distributed_vector()\n"; init(size, dist); } /// Constructor dual_distributed_vector(std::size_t size, value_type fill_value, distribution dist = distribution()) { + std::cout << "dual_distributed_vector(fill)\n"; init(size, dist); mp::fill(*this, fill_value); } ~dual_distributed_vector() { + std::cout << "~dual_distributed_vector()\n"; if (finalized()) return; + std::cout << "~: before fence\n"; fence(); + std::cout << "~: after fence\n"; for (size_t i = 0; i < segments_per_proc; i++) { + std::cout << "~: loop " << i << "\n"; if (datas_[i] != nullptr) { + std::cout << "~: deallocating\n"; backend.deallocate(datas_[i], data_size_ * sizeof(value_type)); } + std::cout << "~: deleting i-th halo\n"; delete halos_[i]; } + std::cout << "~: deleting superhalo\n"; delete halo_; } @@ -185,7 +194,8 @@ class dual_distributed_vector { for (std::size_t i = 0; i < segments_per_proc; i++) { if (size_ > 0) { - datas_.push_back(static_cast(backend.allocate(data_size_ * sizeof(T)))); + datas_.push_back(static_cast( + backend.allocate(data_size_ * sizeof(value_type)))); } halos_.push_back(new span_halo(default_comm(), datas_[i], data_size_, hb)); From 2e3c96dc987d5768a1b1783eb6042f7e2c55a953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 15:38:32 +0100 Subject: [PATCH 019/101] prog --- .../mp/containers/dual_distributed_vector.hpp | 85 ++++++++++++++++++- 1 file changed, 81 insertions(+), 4 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index e7d23b8cf8..e7af03ca15 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -5,12 +5,86 @@ #pragma once #include -#include #include #include namespace dr::mp { +class MpiBackend { + dr::rma_window win_; + +public: + void *allocate(std::size_t data_size) { + assert(data_size > 0); + void *data = __detail::allocator().allocate(data_size); + DRLOG("called MPI allocate({}) -> got:{}", data_size, data); + win_.create(default_comm(), data, data_size); + active_wins().insert(win_.mpi_win()); + return data; + } + + void deallocate(void *data, std::size_t data_size) { + assert(data_size > 0); + DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); + active_wins().erase(win_.mpi_win()); + __detail::allocator().deallocate(static_cast(data), + data_size); + } + + void free() { + DRLOG("calling MPI win free"); + win_.free(); + } + + void getmem(void *dst, std::size_t offset, std::size_t datalen, + int segment_index) { + DRLOG("calling MPI get(dst:{}, " + "segm_offset:{}, size:{}, peer:{})", + dst, offset, datalen, segment_index); + +#if (MPI_VERSION >= 4) || \ + (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000)) + // 64-bit API inside + win_.get(dst, datalen, segment_index, offset); +#else + for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) { + std::size_t s = std::min(remainder, (std::size_t)INT_MAX); + DRLOG("{}:{} win_.get total {} now {} bytes at off {}, dst offset {}", + default_comm().rank(), __LINE__, datalen, s, off, offset + off); + win_.get((uint8_t *)dst + off, s, segment_index, offset + off); + off += s; + remainder -= s; + } +#endif + } + + void putmem(void const *src, std::size_t offset, std::size_t datalen, + int segment_index) { + DRLOG("calling MPI put(segm_offset:{}, " + "src:{}, size:{}, peer:{})", + offset, src, datalen, segment_index); + +#if (MPI_VERSION >= 4) || \ + (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000)) + // 64-bit API inside + win_.put(src, datalen, segment_index, offset); +#else + for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) { + std::size_t s = std::min(remainder, (std::size_t)INT_MAX); + DRLOG("{}:{} win_.put {} bytes at off {}, dst offset {}", + default_comm().rank(), __LINE__, s, off, offset + off); + win_.put((uint8_t *)src + off, s, segment_index, offset + off); + off += s; + remainder -= s; + } +#endif + } + + std::size_t getrank() { return win_.communicator().rank(); } + + void fence() { win_.fence(); } +}; + /// distributed vector template class dual_distributed_vector { @@ -142,15 +216,18 @@ class dual_distributed_vector { for (size_t i = 0; i < segments_per_proc; i++) { std::cout << "~: loop " << i << "\n"; if (datas_[i] != nullptr) { - std::cout << "~: deallocating\n"; + std::cout << "~: backend.deallocate()\n"; backend.deallocate(datas_[i], data_size_ * sizeof(value_type)); } - std::cout << "~: deleting i-th halo\n"; + std::cout << "~: delete halos_[i]\n"; delete halos_[i]; } + + std::cout << "~: backend.free()\n"; + backend.free(); - std::cout << "~: deleting superhalo\n"; + std::cout << "~: delete halo_\n"; delete halo_; } From dff502adf60634696d905c09ce67fe5fcf9c7bf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 15:39:56 +0100 Subject: [PATCH 020/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index e7af03ca15..3c2560c45b 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -10,7 +10,7 @@ namespace dr::mp { -class MpiBackend { +class DualMpiBackend { dr::rma_window win_; public: @@ -86,7 +86,7 @@ class MpiBackend { }; /// distributed vector -template +template class dual_distributed_vector { public: From 89f9c18164f301c1acc71e4714572b4f63fca5fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 15:42:05 +0100 Subject: [PATCH 021/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 3c2560c45b..496620306b 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -206,28 +206,19 @@ class dual_distributed_vector { } ~dual_distributed_vector() { - std::cout << "~dual_distributed_vector()\n"; if (finalized()) return; - std::cout << "~: before fence\n"; fence(); - std::cout << "~: after fence\n"; for (size_t i = 0; i < segments_per_proc; i++) { - std::cout << "~: loop " << i << "\n"; if (datas_[i] != nullptr) { - std::cout << "~: backend.deallocate()\n"; backend.deallocate(datas_[i], data_size_ * sizeof(value_type)); } - std::cout << "~: delete halos_[i]\n"; delete halos_[i]; } - std::cout << "~: backend.free()\n"; backend.free(); - - std::cout << "~: delete halo_\n"; delete halo_; } From 2e4cc88cd230575278c5e9873a79422907fdb71d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 15:48:58 +0100 Subject: [PATCH 022/101] prog --- .../mp/containers/dual_distributed_vector.hpp | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 496620306b..6b28c561ab 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -27,15 +27,11 @@ class DualMpiBackend { assert(data_size > 0); DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); active_wins().erase(win_.mpi_win()); + win_.free(); __detail::allocator().deallocate(static_cast(data), data_size); } - void free() { - DRLOG("calling MPI win free"); - win_.free(); - } - void getmem(void *dst, std::size_t offset, std::size_t datalen, int segment_index) { DRLOG("calling MPI get(dst:{}, " @@ -193,14 +189,12 @@ class dual_distributed_vector { /// Constructor dual_distributed_vector(std::size_t size = 0, distribution dist = distribution()) { - std::cout << "dual_distributed_vector()\n"; init(size, dist); } /// Constructor dual_distributed_vector(std::size_t size, value_type fill_value, distribution dist = distribution()) { - std::cout << "dual_distributed_vector(fill)\n"; init(size, dist); mp::fill(*this, fill_value); } @@ -208,17 +202,16 @@ class dual_distributed_vector { ~dual_distributed_vector() { if (finalized()) return; - fence(); - for (size_t i = 0; i < segments_per_proc; i++) { + fence(i); + if (datas_[i] != nullptr) { - backend.deallocate(datas_[i], data_size_ * sizeof(value_type)); + backends[i].deallocate(datas_[i], data_size_ * sizeof(value_type)); } delete halos_[i]; } - backend.free(); delete halo_; } @@ -236,7 +229,7 @@ class dual_distributed_vector { auto segments() const { return rng::views::all(segments_); } - void fence() { backend.fence(); } + void fence(const std::size_t i) { backends[i].fence(); } private: void init(auto size, auto dist) { @@ -263,7 +256,7 @@ class dual_distributed_vector { for (std::size_t i = 0; i < segments_per_proc; i++) { if (size_ > 0) { datas_.push_back(static_cast( - backend.allocate(data_size_ * sizeof(value_type)))); + backends[i].allocate(data_size_ * sizeof(value_type)))); } halos_.push_back(new span_halo(default_comm(), datas_[i], data_size_, hb)); @@ -295,7 +288,9 @@ class dual_distributed_vector { } } - fence(); + for (size_t i = 0; i < segments_per_proc; i++) { + fence(i); + } } friend dv_segment_iterator; @@ -312,7 +307,7 @@ class dual_distributed_vector { distribution distribution_; std::size_t size_; std::vector> segments_; - BackendT backend; + std::vector backends(segments_per_proc); }; // template From 9c37e12d7d4eb8393d3ac3bc191d6c3ff3b8e5c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 15:50:32 +0100 Subject: [PATCH 023/101] prog --- .../dr/mp/containers/dual_distributed_vector.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 6b28c561ab..e1a4bc715a 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -10,6 +10,8 @@ namespace dr::mp { +static constexpr std::size_t DUAL_SEGMENTS_PER_PROC = 2; + class DualMpiBackend { dr::rma_window win_; @@ -202,7 +204,7 @@ class dual_distributed_vector { ~dual_distributed_vector() { if (finalized()) return; - for (size_t i = 0; i < segments_per_proc; i++) { + for (size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { fence(i); if (datas_[i] != nullptr) { @@ -249,11 +251,11 @@ class dual_distributed_vector { (size / gran + comm_size - 1) / comm_size, hb.prev / gran, hb.next / gran}); - segment_size_ = proc_segments_size / segments_per_proc; + segment_size_ = proc_segments_size / DUAL_SEGMENTS_PER_PROC; data_size_ = segment_size_ + hb.prev + hb.next; - for (std::size_t i = 0; i < segments_per_proc; i++) { + for (std::size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { if (size_ > 0) { datas_.push_back(static_cast( backends[i].allocate(data_size_ * sizeof(value_type)))); @@ -288,15 +290,13 @@ class dual_distributed_vector { } } - for (size_t i = 0; i < segments_per_proc; i++) { + for (size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { fence(i); } } friend dv_segment_iterator; - static constexpr std::size_t segments_per_proc = 2; - std::size_t segment_size_ = 0; std::size_t data_size_ = 0; // size + halo @@ -307,7 +307,7 @@ class dual_distributed_vector { distribution distribution_; std::size_t size_; std::vector> segments_; - std::vector backends(segments_per_proc); + std::vector backends(DUAL_SEGMENTS_PER_PROC); }; // template From 78cbd296c79c86dc2844653cd48dd37ad69d5d7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 15:51:13 +0100 Subject: [PATCH 024/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index e1a4bc715a..53019cc3ef 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -307,7 +307,7 @@ class dual_distributed_vector { distribution distribution_; std::size_t size_; std::vector> segments_; - std::vector backends(DUAL_SEGMENTS_PER_PROC); + std::vector backends{DUAL_SEGMENTS_PER_PROC}; }; // template From e18f59a79d1d47fcd17c653b019ebfd1f6186928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:11:46 +0100 Subject: [PATCH 025/101] prog --- include/dr/mp/containers/dual_segment.hpp | 9 ++++++++- include/dr/mp/containers/segment.hpp | 12 ++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp index 7cab8f8c36..1c61e623a6 100644 --- a/include/dr/mp/containers/dual_segment.hpp +++ b/include/dr/mp/containers/dual_segment.hpp @@ -8,9 +8,16 @@ namespace dr::mp { +template class dual_dv_segment_iterator : public dv_segment_iterator { +protected: + DV::backend_type& backend() override { + return dv_->backends[0]; + } +}; + template class dual_dv_segment : public dv_segment { private: - using iterator = dv_segment_iterator; + using iterator = dual_dv_segment_iterator; public: using difference_type = std::ptrdiff_t; diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 56724ac61b..2a209384d2 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -127,7 +127,7 @@ template class dv_segment_iterator { assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; - dv_->backend.getmem(dst, segment_offset * sizeof(value_type), + backend().getmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -143,7 +143,7 @@ template class dv_segment_iterator { auto segment_offset = index_ + dv_->distribution_.halo().prev; dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); - dv_->backend.putmem(dst, segment_offset * sizeof(value_type), + backend().putmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -158,7 +158,7 @@ template class dv_segment_iterator { #ifndef SYCL_LANGUAGE_VERSION assert(dv_ != nullptr); #endif - const auto my_process_segment_index = dv_->backend.getrank(); + const auto my_process_segment_index = backend().getrank(); if (my_process_segment_index == segment_index_) return dv_->data_ + index_ + dv_->distribution_.halo().prev; @@ -203,7 +203,11 @@ template class dv_segment_iterator { return dv_->distribution_.halo(); } -private: +protected: + virtual DV::backend_type& backend() { + return dv_->backend; + } + // all fields need to be initialized by default ctor so every default // constructed iter is equal to any other default constructed iter DV *dv_ = nullptr; From 3329c9772a4d405b81685097b2322689074ae05a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:15:22 +0100 Subject: [PATCH 026/101] prog --- include/dr/mp/containers/dual_segment.hpp | 11 ++++++----- include/dr/mp/containers/segment.hpp | 5 ++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp index 1c61e623a6..b0b9bc7df4 100644 --- a/include/dr/mp/containers/dual_segment.hpp +++ b/include/dr/mp/containers/dual_segment.hpp @@ -8,14 +8,15 @@ namespace dr::mp { -template class dual_dv_segment_iterator : public dv_segment_iterator { +template +class dual_dv_segment_iterator : public dv_segment_iterator { protected: - DV::backend_type& backend() override { - return dv_->backends[0]; - } + virtual DV::backend_type& backend() { return dv_->backends[0]; } + virtual const DV::backend_type& backend() const { return dv_->backends[0]; } }; -template class dual_dv_segment : public dv_segment { +template +class dual_dv_segment : public dv_segment { private: using iterator = dual_dv_segment_iterator; diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 2a209384d2..b2bfd8c25e 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -204,9 +204,8 @@ template class dv_segment_iterator { } protected: - virtual DV::backend_type& backend() { - return dv_->backend; - } + virtual DV::backend_type& backend() { return dv_->backend; } + virtual const DV::backend_type& backend() const { return dv_->backend; } // all fields need to be initialized by default ctor so every default // constructed iter is equal to any other default constructed iter From a0e067d4348d0081c8cc5c5e2ca27fec935921ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:30:49 +0100 Subject: [PATCH 027/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 7 +++++++ include/dr/mp/containers/dual_distributed_vector.hpp | 7 +++++++ include/dr/mp/containers/dual_segment.hpp | 9 +-------- include/dr/mp/containers/segment.hpp | 6 +++--- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 2611963064..7850118494 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -276,6 +276,13 @@ template class distributed_vector { void fence() { backend.fence(); } + backend_type& backend(const std::size_t segment_index) { + return backend; + } + const backend_type& backend(const std::size_t segment_index) const { + return backend; + } + private: void init(auto size, auto dist) { size_ = size; diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 53019cc3ef..133aee1c2b 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -233,6 +233,13 @@ class dual_distributed_vector { void fence(const std::size_t i) { backends[i].fence(); } + backend_type& backend(const std::size_t segment_index) { + return backends[0]; + } + const backend_type& backend(const std::size_t segment_index) const { + return backends[0]; + } + private: void init(auto size, auto dist) { size_ = size; diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp index b0b9bc7df4..d94b7ccf9b 100644 --- a/include/dr/mp/containers/dual_segment.hpp +++ b/include/dr/mp/containers/dual_segment.hpp @@ -8,17 +8,10 @@ namespace dr::mp { -template -class dual_dv_segment_iterator : public dv_segment_iterator { -protected: - virtual DV::backend_type& backend() { return dv_->backends[0]; } - virtual const DV::backend_type& backend() const { return dv_->backends[0]; } -}; - template class dual_dv_segment : public dv_segment { private: - using iterator = dual_dv_segment_iterator; + using iterator = dv_segment_iterator; public: using difference_type = std::ptrdiff_t; diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index b2bfd8c25e..5121603108 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -203,9 +203,9 @@ template class dv_segment_iterator { return dv_->distribution_.halo(); } -protected: - virtual DV::backend_type& backend() { return dv_->backend; } - virtual const DV::backend_type& backend() const { return dv_->backend; } +private: + virtual DV::backend_type& backend() { return dv_->backend(segment_index_); } + virtual const DV::backend_type& backend() const { return dv_->backend(segment_index_); } // all fields need to be initialized by default ctor so every default // constructed iter is equal to any other default constructed iter From fb05c7dcb44d978fee7bd8382d10fdfa67f3bac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:32:11 +0100 Subject: [PATCH 028/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 12 ++++++------ include/dr/mp/containers/dual_distributed_vector.hpp | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 7850118494..7bd06428f9 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -254,7 +254,7 @@ template class distributed_vector { if (!finalized()) { fence(); if (data_ != nullptr) { - backend.deallocate(data_, data_size_ * sizeof(value_type)); + backend_.deallocate(data_, data_size_ * sizeof(value_type)); } delete halo_; @@ -274,13 +274,13 @@ template class distributed_vector { auto segments() const { return rng::views::all(segments_); } - void fence() { backend.fence(); } + void fence() { backend_.fence(); } backend_type& backend(const std::size_t segment_index) { - return backend; + return backend_; } const backend_type& backend(const std::size_t segment_index) const { - return backend; + return backend_; } private: @@ -302,7 +302,7 @@ template class distributed_vector { data_size_ = segment_size_ + hb.prev + hb.next; if (size_ > 0) { - data_ = static_cast(backend.allocate(data_size_ * sizeof(T))); + data_ = static_cast(backend_.allocate(data_size_ * sizeof(T))); } halo_ = new span_halo(default_comm(), data_, data_size_, hb); @@ -326,7 +326,7 @@ template class distributed_vector { distribution distribution_; std::size_t size_; std::vector> segments_; - BackendT backend; + BackendT backend_; }; template diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 133aee1c2b..1f54a5f946 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -208,7 +208,7 @@ class dual_distributed_vector { fence(i); if (datas_[i] != nullptr) { - backends[i].deallocate(datas_[i], data_size_ * sizeof(value_type)); + backends_[i].deallocate(datas_[i], data_size_ * sizeof(value_type)); } delete halos_[i]; @@ -231,13 +231,13 @@ class dual_distributed_vector { auto segments() const { return rng::views::all(segments_); } - void fence(const std::size_t i) { backends[i].fence(); } + void fence(const std::size_t i) { backends_[i].fence(); } backend_type& backend(const std::size_t segment_index) { - return backends[0]; + return backends_[0]; } const backend_type& backend(const std::size_t segment_index) const { - return backends[0]; + return backends_[0]; } private: @@ -265,7 +265,7 @@ class dual_distributed_vector { for (std::size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { if (size_ > 0) { datas_.push_back(static_cast( - backends[i].allocate(data_size_ * sizeof(value_type)))); + backends_[i].allocate(data_size_ * sizeof(value_type)))); } halos_.push_back(new span_halo(default_comm(), datas_[i], data_size_, hb)); @@ -314,7 +314,7 @@ class dual_distributed_vector { distribution distribution_; std::size_t size_; std::vector> segments_; - std::vector backends{DUAL_SEGMENTS_PER_PROC}; + std::vector backends_{DUAL_SEGMENTS_PER_PROC}; }; // template From ea43af7dfc8ff4789e52fd737b8327da120cdfe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:33:45 +0100 Subject: [PATCH 029/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 4 ++-- include/dr/mp/containers/segment.hpp | 9 +++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 7bd06428f9..84590ca3f5 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -277,10 +277,10 @@ template class distributed_vector { void fence() { backend_.fence(); } backend_type& backend(const std::size_t segment_index) { - return backend_; + return backend_; } const backend_type& backend(const std::size_t segment_index) const { - return backend_; + return backend_; } private: diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 5121603108..513113ad1d 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -127,7 +127,7 @@ template class dv_segment_iterator { assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; - backend().getmem(dst, segment_offset * sizeof(value_type), + dv->backend().getmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -143,7 +143,7 @@ template class dv_segment_iterator { auto segment_offset = index_ + dv_->distribution_.halo().prev; dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); - backend().putmem(dst, segment_offset * sizeof(value_type), + dv->backend().putmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -158,7 +158,7 @@ template class dv_segment_iterator { #ifndef SYCL_LANGUAGE_VERSION assert(dv_ != nullptr); #endif - const auto my_process_segment_index = backend().getrank(); + const auto my_process_segment_index = dv->backend().getrank(); if (my_process_segment_index == segment_index_) return dv_->data_ + index_ + dv_->distribution_.halo().prev; @@ -204,9 +204,6 @@ template class dv_segment_iterator { } private: - virtual DV::backend_type& backend() { return dv_->backend(segment_index_); } - virtual const DV::backend_type& backend() const { return dv_->backend(segment_index_); } - // all fields need to be initialized by default ctor so every default // constructed iter is equal to any other default constructed iter DV *dv_ = nullptr; From 4fd4d8f46de198bf401a68931b3c91498ada507b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:34:26 +0100 Subject: [PATCH 030/101] prog --- include/dr/mp/containers/segment.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 513113ad1d..ddbb8f71c9 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -127,7 +127,7 @@ template class dv_segment_iterator { assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; - dv->backend().getmem(dst, segment_offset * sizeof(value_type), + dv_->backend().getmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -143,7 +143,7 @@ template class dv_segment_iterator { auto segment_offset = index_ + dv_->distribution_.halo().prev; dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); - dv->backend().putmem(dst, segment_offset * sizeof(value_type), + dv_->backend().putmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -158,7 +158,7 @@ template class dv_segment_iterator { #ifndef SYCL_LANGUAGE_VERSION assert(dv_ != nullptr); #endif - const auto my_process_segment_index = dv->backend().getrank(); + const auto my_process_segment_index = dv_->backend().getrank(); if (my_process_segment_index == segment_index_) return dv_->data_ + index_ + dv_->distribution_.halo().prev; From 95ca6cfda925458dabd86ca424cd1b2ffd4334f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:35:04 +0100 Subject: [PATCH 031/101] prog --- include/dr/mp/containers/segment.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index ddbb8f71c9..6511c304a8 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -127,7 +127,7 @@ template class dv_segment_iterator { assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; - dv_->backend().getmem(dst, segment_offset * sizeof(value_type), + dv_->backend(segment_index_).getmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -143,7 +143,7 @@ template class dv_segment_iterator { auto segment_offset = index_ + dv_->distribution_.halo().prev; dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); - dv_->backend().putmem(dst, segment_offset * sizeof(value_type), + dv_->backend(segment_index_).putmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -158,7 +158,7 @@ template class dv_segment_iterator { #ifndef SYCL_LANGUAGE_VERSION assert(dv_ != nullptr); #endif - const auto my_process_segment_index = dv_->backend().getrank(); + const auto my_process_segment_index = dv_->backend(segment_index_).getrank(); if (my_process_segment_index == segment_index_) return dv_->data_ + index_ + dv_->distribution_.halo().prev; From 900613e34be606a73ca1675d452dc9258685e7c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:37:42 +0100 Subject: [PATCH 032/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 1f54a5f946..b2ea96167f 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -233,11 +233,13 @@ class dual_distributed_vector { void fence(const std::size_t i) { backends_[i].fence(); } - backend_type& backend(const std::size_t segment_index) { - return backends_[0]; + backend_type& backend(const std::size_t segment_index) { + auto comm_size = default_comm().size(); + return backends_[segment_index < comm_size ? 0 : 1]; } - const backend_type& backend(const std::size_t segment_index) const { - return backends_[0]; + const backend_type& backend(const std::size_t segment_index) const { + auto comm_size = default_comm().size(); + return backends_[segment_index < comm_size ? 0 : 1]; } private: From 9218cd89ae300c3182dbd71aa43c39e3178bdfcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:40:48 +0100 Subject: [PATCH 033/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index b2ea96167f..e3e61d0293 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -234,10 +234,12 @@ class dual_distributed_vector { void fence(const std::size_t i) { backends_[i].fence(); } backend_type& backend(const std::size_t segment_index) { + std::cout << "[backend] segment_index: " << segment_index << "\n"; auto comm_size = default_comm().size(); return backends_[segment_index < comm_size ? 0 : 1]; } const backend_type& backend(const std::size_t segment_index) const { + std::cout << "[backend] segment_index: " << segment_index << "\n"; auto comm_size = default_comm().size(); return backends_[segment_index < comm_size ? 0 : 1]; } From 46bfa65231bd53a19928e564b6c2cc5ba5c1f063 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:41:46 +0100 Subject: [PATCH 034/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index e3e61d0293..66ccd667ab 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -234,13 +234,13 @@ class dual_distributed_vector { void fence(const std::size_t i) { backends_[i].fence(); } backend_type& backend(const std::size_t segment_index) { - std::cout << "[backend] segment_index: " << segment_index << "\n"; auto comm_size = default_comm().size(); + std::cout << "[backend] segment_index, comm_size: " << segment_index << ", " << comm_size << "\n"; return backends_[segment_index < comm_size ? 0 : 1]; } const backend_type& backend(const std::size_t segment_index) const { - std::cout << "[backend] segment_index: " << segment_index << "\n"; auto comm_size = default_comm().size(); + std::cout << "[backend] segment_index, comm_size: " << segment_index << ", " << comm_size << "\n"; return backends_[segment_index < comm_size ? 0 : 1]; } From 3d7a9a80865a184ddbc70dacad34cc3df033291e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:45:33 +0100 Subject: [PATCH 035/101] prog --- .../mp/containers/dual_distributed_vector.hpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 66ccd667ab..4b541df4cf 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -280,18 +280,18 @@ class dual_distributed_vector { std::size_t segment_index = 0; bool first_half = true; for (std::size_t i = 0; i < size; i += segment_size_) { - segments_.emplace_back(this, segment_index, + segments_.emplace_back(this, segment_index++, std::min(segment_size_, size - i), data_size_); - if (first_half) { - if (segment_index < comm_size - 1) { - segment_index++; - } else { - first_half = false; - } - } else { - segment_index--; - } + // if (first_half) { + // if (segment_index < comm_size - 1) { + // segment_index++; + // } else { + // first_half = false; + // } + // } else { + // segment_index--; + // } } for (auto& s: segments_) { From 2603a6c621f773f5f2939c1d265466b3c7883b35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:46:02 +0100 Subject: [PATCH 036/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 4b541df4cf..541d9a5203 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -278,7 +278,7 @@ class dual_distributed_vector { halo_ = new cyclic_span_halo(halos_); std::size_t segment_index = 0; - bool first_half = true; + // bool first_half = true; for (std::size_t i = 0; i < size; i += segment_size_) { segments_.emplace_back(this, segment_index++, std::min(segment_size_, size - i), data_size_); From cdce40571dcfd008b4c7952e82e7b190c6de4f10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Fri, 27 Dec 2024 16:54:57 +0100 Subject: [PATCH 037/101] prog --- .../mp/containers/dual_distributed_vector.hpp | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 541d9a5203..80280c504d 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -36,20 +36,22 @@ class DualMpiBackend { void getmem(void *dst, std::size_t offset, std::size_t datalen, int segment_index) { + const std::size_t peer = get_peer(segment_index); + DRLOG("calling MPI get(dst:{}, " "segm_offset:{}, size:{}, peer:{})", - dst, offset, datalen, segment_index); + dst, offset, datalen, peer); #if (MPI_VERSION >= 4) || \ (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000)) // 64-bit API inside - win_.get(dst, datalen, segment_index, offset); + win_.get(dst, datalen, peer, offset); #else for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) { std::size_t s = std::min(remainder, (std::size_t)INT_MAX); DRLOG("{}:{} win_.get total {} now {} bytes at off {}, dst offset {}", default_comm().rank(), __LINE__, datalen, s, off, offset + off); - win_.get((uint8_t *)dst + off, s, segment_index, offset + off); + win_.get((uint8_t *)dst + off, s, peer, offset + off); off += s; remainder -= s; } @@ -58,20 +60,22 @@ class DualMpiBackend { void putmem(void const *src, std::size_t offset, std::size_t datalen, int segment_index) { + const std::size_t peer = get_peer(segment_index); + DRLOG("calling MPI put(segm_offset:{}, " "src:{}, size:{}, peer:{})", - offset, src, datalen, segment_index); + offset, src, datalen, peer); #if (MPI_VERSION >= 4) || \ (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000)) // 64-bit API inside - win_.put(src, datalen, segment_index, offset); + win_.put(src, datalen, peer, offset); #else for (std::size_t remainder = datalen, off = 0UL; remainder > 0;) { std::size_t s = std::min(remainder, (std::size_t)INT_MAX); DRLOG("{}:{} win_.put {} bytes at off {}, dst offset {}", default_comm().rank(), __LINE__, s, off, offset + off); - win_.put((uint8_t *)src + off, s, segment_index, offset + off); + win_.put((uint8_t *)src + off, s, peer, offset + off); off += s; remainder -= s; } @@ -81,6 +85,12 @@ class DualMpiBackend { std::size_t getrank() { return win_.communicator().rank(); } void fence() { win_.fence(); } + +private: + std::size_t get_peer(const std::size_t segment_index) { + const auto size = win_.communicator().size(); + return segment_index < size ? segment_index : 2 * size - segment_index - 1; + } }; /// distributed vector @@ -236,12 +246,12 @@ class dual_distributed_vector { backend_type& backend(const std::size_t segment_index) { auto comm_size = default_comm().size(); std::cout << "[backend] segment_index, comm_size: " << segment_index << ", " << comm_size << "\n"; - return backends_[segment_index < comm_size ? 0 : 1]; + return backends_[segment_index < comm_size ? 0 : 1]; } const backend_type& backend(const std::size_t segment_index) const { auto comm_size = default_comm().size(); std::cout << "[backend] segment_index, comm_size: " << segment_index << ", " << comm_size << "\n"; - return backends_[segment_index < comm_size ? 0 : 1]; + return backends_[segment_index < comm_size ? 0 : 1]; } private: From 46f6adea7a810cc065b308369126aea911d1ac99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 1 Jan 2025 23:53:55 +0100 Subject: [PATCH 038/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 80280c504d..ab1ff57942 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -290,6 +290,9 @@ class dual_distributed_vector { std::size_t segment_index = 0; // bool first_half = true; for (std::size_t i = 0; i < size; i += segment_size_) { + std::cout << i << ": segments_.emplace_back(" << segment_index + << ", " << std::min(segment_size_, size - i) + << ", " << data_size_ << ")\n"; segments_.emplace_back(this, segment_index++, std::min(segment_size_, size - i), data_size_); From e4eafa208071a095c7860c3ad39434c0f4613319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 1 Jan 2025 23:58:26 +0100 Subject: [PATCH 039/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index ab1ff57942..2f9adb39e8 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -287,10 +287,11 @@ class dual_distributed_vector { halo_ = new cyclic_span_halo(halos_); + std::cout << "entering loop, segment_size_ = " << segment_size_ << "\n"; std::size_t segment_index = 0; // bool first_half = true; for (std::size_t i = 0; i < size; i += segment_size_) { - std::cout << i << ": segments_.emplace_back(" << segment_index + std::cout << "\t" << i << ": segments_.emplace_back(" << segment_index << ", " << std::min(segment_size_, size - i) << ", " << data_size_ << ")\n"; segments_.emplace_back(this, segment_index++, From ad460d8f370abb628fe4420cce2550ce725c189e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 00:25:19 +0100 Subject: [PATCH 040/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 2f9adb39e8..3aefa5bcbc 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -268,11 +268,19 @@ class dual_distributed_vector { assert(hb.prev % gran == 0 && "size must be a multiple of the granularity"); assert(hb.next % gran == 0 && "size must be a multiple of the granularity"); + std::size_t segment_count = comm_size * DUAL_SEGMENTS_PER_PROC; auto proc_segments_size = gran * std::max({ - (size / gran + comm_size - 1) / comm_size, + (size / gran + segment_count - 1) / segment_count, hb.prev / gran, hb.next / gran}); - segment_size_ = proc_segments_size / DUAL_SEGMENTS_PER_PROC; + segment_size_ = proc_segments_size; + + std::cout << "init: segment_count = " << segment_count << "\n"; + + std::size_t actual_segment_count_ = + size_ / segment_size_ + (size_ % segment_size_ == 0 ? 0 : 1); + assert(actual_segment_count_ <= segment_count + && "there must be at most 2 segments per process"); data_size_ = segment_size_ + hb.prev + hb.next; From bd1e8edd64264d767135132e1575324f201c449e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 00:35:55 +0100 Subject: [PATCH 041/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 3aefa5bcbc..b7984072ce 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -245,12 +245,12 @@ class dual_distributed_vector { backend_type& backend(const std::size_t segment_index) { auto comm_size = default_comm().size(); - std::cout << "[backend] segment_index, comm_size: " << segment_index << ", " << comm_size << "\n"; + std::cout << "backend(" << segment_index << ") -> " << (segment_index < comm_size ? 0 : 1) << "\n"; return backends_[segment_index < comm_size ? 0 : 1]; } const backend_type& backend(const std::size_t segment_index) const { auto comm_size = default_comm().size(); - std::cout << "[backend] segment_index, comm_size: " << segment_index << ", " << comm_size << "\n"; + std::cout << "backend(" << segment_index << ") -> " << (segment_index < comm_size ? 0 : 1) << "\n"; return backends_[segment_index < comm_size ? 0 : 1]; } From 8d0f5be42359029e311231a6af90053048aa05d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 00:41:33 +0100 Subject: [PATCH 042/101] prog --- test/gtest/mp/dual_distributed_vector.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/gtest/mp/dual_distributed_vector.cpp b/test/gtest/mp/dual_distributed_vector.cpp index 7930eb882a..10c646f81b 100644 --- a/test/gtest/mp/dual_distributed_vector.cpp +++ b/test/gtest/mp/dual_distributed_vector.cpp @@ -87,6 +87,10 @@ TEST(MpTests, DualDistributedVectorReference) { } dr::mp::fence(); + std::vector ref(n); + std::iota(ref.begin(), ref.end(), 100); + EXPECT_TRUE(equal_gtest(dv, ref)); + const DV &cdv = dv; if (comm_rank == 0) { common_operations(cdv); From 046b7e41fd57397f6e80578d9647d3f3f63ffdde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 00:49:38 +0100 Subject: [PATCH 043/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 2 +- test/gtest/mp/dual_distributed_vector.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index b7984072ce..e0014a29b2 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -227,7 +227,7 @@ class dual_distributed_vector { delete halo_; } - /// Returns iterator to beginning= + /// Returns iterator to beginning auto begin() const { return iterator(this, 0); } /// Returns iterator to end auto end() const { return begin() + size_; } diff --git a/test/gtest/mp/dual_distributed_vector.cpp b/test/gtest/mp/dual_distributed_vector.cpp index 10c646f81b..777543ca6c 100644 --- a/test/gtest/mp/dual_distributed_vector.cpp +++ b/test/gtest/mp/dual_distributed_vector.cpp @@ -87,9 +87,11 @@ TEST(MpTests, DualDistributedVectorReference) { } dr::mp::fence(); - std::vector ref(n); - std::iota(ref.begin(), ref.end(), 100); - EXPECT_TRUE(equal_gtest(dv, ref)); + std::cout << "printing the vec\n\t[" << dv[0]; + for (std::size_t i = 1; i < n; i++) { + std::cout << ", " << dv[i]; + } + std::cout << "]\n"; const DV &cdv = dv; if (comm_rank == 0) { From 5c32e9e185a57b974f1ade2a19fdaede8e43b83d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 00:55:43 +0100 Subject: [PATCH 044/101] prog --- test/gtest/mp/dual_distributed_vector.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/gtest/mp/dual_distributed_vector.cpp b/test/gtest/mp/dual_distributed_vector.cpp index 777543ca6c..aa9759154b 100644 --- a/test/gtest/mp/dual_distributed_vector.cpp +++ b/test/gtest/mp/dual_distributed_vector.cpp @@ -93,6 +93,12 @@ TEST(MpTests, DualDistributedVectorReference) { } std::cout << "]\n"; + std::cout << "printing the vec iteratively\n\t[" << dv[0]; + for (auto iter = dv.begin(); iter != dv.end(); iter++) { + std::cout << ", " << *iter; + } + std::cout << "]\n"; + const DV &cdv = dv; if (comm_rank == 0) { common_operations(cdv); From 6e95e27c308b73e11505507518995a1b3575b244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 01:00:58 +0100 Subject: [PATCH 045/101] prog --- test/gtest/mp/dual_distributed_vector.cpp | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/test/gtest/mp/dual_distributed_vector.cpp b/test/gtest/mp/dual_distributed_vector.cpp index aa9759154b..10c7fa009c 100644 --- a/test/gtest/mp/dual_distributed_vector.cpp +++ b/test/gtest/mp/dual_distributed_vector.cpp @@ -71,12 +71,12 @@ int aa; // Operations on a const distributed_vector void common_operations(auto &dv) { - aa = dv[1]; - EXPECT_EQ(dv[1], 101); - EXPECT_EQ(*(&(dv[1])), 101); + aa = dv[2]; + EXPECT_EQ(dv[2], 102); + EXPECT_EQ(*(&(dv[2])), 102); - auto p = &dv[1]; - EXPECT_EQ(*(p + 1), 102); + auto p = &dv[2]; + EXPECT_EQ(*(p + 2), 103); } TEST(MpTests, DualDistributedVectorReference) { @@ -87,18 +87,6 @@ TEST(MpTests, DualDistributedVectorReference) { } dr::mp::fence(); - std::cout << "printing the vec\n\t[" << dv[0]; - for (std::size_t i = 1; i < n; i++) { - std::cout << ", " << dv[i]; - } - std::cout << "]\n"; - - std::cout << "printing the vec iteratively\n\t[" << dv[0]; - for (auto iter = dv.begin(); iter != dv.end(); iter++) { - std::cout << ", " << *iter; - } - std::cout << "]\n"; - const DV &cdv = dv; if (comm_rank == 0) { common_operations(cdv); From 46dff8ecc1b40092dcc2ef236613c4278c0bbfbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 01:03:28 +0100 Subject: [PATCH 046/101] prog --- test/gtest/mp/dual_distributed_vector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gtest/mp/dual_distributed_vector.cpp b/test/gtest/mp/dual_distributed_vector.cpp index 10c7fa009c..19a8456b84 100644 --- a/test/gtest/mp/dual_distributed_vector.cpp +++ b/test/gtest/mp/dual_distributed_vector.cpp @@ -76,7 +76,7 @@ void common_operations(auto &dv) { EXPECT_EQ(*(&(dv[2])), 102); auto p = &dv[2]; - EXPECT_EQ(*(p + 2), 103); + EXPECT_EQ(*(p + 1), 103); } TEST(MpTests, DualDistributedVectorReference) { From b842cc7035f45765bd6664937507c0c812e119dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 01:05:28 +0100 Subject: [PATCH 047/101] prog --- test/gtest/mp/dual_distributed_vector.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/gtest/mp/dual_distributed_vector.cpp b/test/gtest/mp/dual_distributed_vector.cpp index 19a8456b84..d31ab5c07a 100644 --- a/test/gtest/mp/dual_distributed_vector.cpp +++ b/test/gtest/mp/dual_distributed_vector.cpp @@ -71,12 +71,12 @@ int aa; // Operations on a const distributed_vector void common_operations(auto &dv) { - aa = dv[2]; - EXPECT_EQ(dv[2], 102); - EXPECT_EQ(*(&(dv[2])), 102); + aa = dv[0]; + EXPECT_EQ(dv[0], 100); + EXPECT_EQ(*(&(dv[0])), 100); - auto p = &dv[2]; - EXPECT_EQ(*(p + 1), 103); + auto p = &dv[0]; + EXPECT_EQ(*(p + 1), 101); } TEST(MpTests, DualDistributedVectorReference) { From d43b9d35e24917f1d0f0b4ae718410a04ea73d7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 18:07:53 +0100 Subject: [PATCH 048/101] prog --- .../mp/containers/dual_distributed_vector.hpp | 40 ++++++++----------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index e0014a29b2..c70397b16f 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -245,12 +245,12 @@ class dual_distributed_vector { backend_type& backend(const std::size_t segment_index) { auto comm_size = default_comm().size(); - std::cout << "backend(" << segment_index << ") -> " << (segment_index < comm_size ? 0 : 1) << "\n"; + // std::cout << "backend(" << segment_index << ") -> " << (segment_index < comm_size ? 0 : 1) << "\n"; return backends_[segment_index < comm_size ? 0 : 1]; } const backend_type& backend(const std::size_t segment_index) const { auto comm_size = default_comm().size(); - std::cout << "backend(" << segment_index << ") -> " << (segment_index < comm_size ? 0 : 1) << "\n"; + // std::cout << "backend(" << segment_index << ") -> " << (segment_index < comm_size ? 0 : 1) << "\n"; return backends_[segment_index < comm_size ? 0 : 1]; } @@ -275,7 +275,7 @@ class dual_distributed_vector { hb.next / gran}); segment_size_ = proc_segments_size; - std::cout << "init: segment_count = " << segment_count << "\n"; + // std::cout << "init: segment_count = " << segment_count << "\n"; std::size_t actual_segment_count_ = size_ / segment_size_ + (size_ % segment_size_ == 0 ? 0 : 1); @@ -295,34 +295,28 @@ class dual_distributed_vector { halo_ = new cyclic_span_halo(halos_); - std::cout << "entering loop, segment_size_ = " << segment_size_ << "\n"; + // std::cout << "entering loop, segment_size_ = " << segment_size_ << "\n"; std::size_t segment_index = 0; - // bool first_half = true; for (std::size_t i = 0; i < size; i += segment_size_) { - std::cout << "\t" << i << ": segments_.emplace_back(" << segment_index - << ", " << std::min(segment_size_, size - i) - << ", " << data_size_ << ")\n"; + // std::cout << "\t" << i << ": segments_.emplace_back(" << segment_index + // << ", " << std::min(segment_size_, size - i) + // << ", " << data_size_ << ")\n"; segments_.emplace_back(this, segment_index++, std::min(segment_size_, size - i), data_size_); - - // if (first_half) { - // if (segment_index < comm_size - 1) { - // segment_index++; - // } else { - // first_half = false; - // } - // } else { - // segment_index--; - // } } - for (auto& s: segments_) { - if (s.is_local()) { - s.swap_state(); - break; - } + for (size_t i = 0; i < default_comm().size(); i++) { + segments_[i].swap_state(); } + // segments_[default_comm().rank()].swap_state(); + // for (auto& s: segments_) { + // if (s.is_local()) { + // s.swap_state(); + // break; + // } + // } + for (size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { fence(i); } From 1b6b21b583318571b02532110254876b1c0de07f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 19:51:51 +0100 Subject: [PATCH 049/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 8 -------- test/gtest/mp/xp-tests.hpp | 10 +++++++--- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index c70397b16f..56be62fa10 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -309,14 +309,6 @@ class dual_distributed_vector { segments_[i].swap_state(); } - // segments_[default_comm().rank()].swap_state(); - // for (auto& s: segments_) { - // if (s.is_local()) { - // s.swap_state(); - // break; - // } - // } - for (size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { fence(i); } diff --git a/test/gtest/mp/xp-tests.hpp b/test/gtest/mp/xp-tests.hpp index d4c14fc0e5..ea267fea54 100644 --- a/test/gtest/mp/xp-tests.hpp +++ b/test/gtest/mp/xp-tests.hpp @@ -44,11 +44,15 @@ using AllTypes = using IshmemTypes = ::testing::Types>; #else -using AllTypes = ::testing::Types>; -using IshmemTypes = ::testing::Types>; +using AllTypes = ::testing::Types, + dr::mp::dual_distributed_vector>; +using IshmemTypes = ::testing::Types, + dr::mp::dual_distributed_vector>; #endif -using AllTypesWithoutIshmem = ::testing::Types>; +using AllTypesWithoutIshmem = + ::testing::Types, + dr::mp::dual_distributed_vector>; namespace dr::mp { From d333564c11878e500ebbaa7bea24a3e28b3a01c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 19:56:30 +0100 Subject: [PATCH 050/101] prog --- test/gtest/mp/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/gtest/mp/CMakeLists.txt b/test/gtest/mp/CMakeLists.txt index 7ff7cf9536..dfc5547175 100644 --- a/test/gtest/mp/CMakeLists.txt +++ b/test/gtest/mp/CMakeLists.txt @@ -59,7 +59,8 @@ add_executable( add_executable(mp-quick-test mp-tests.cpp - dual_distributed_vector.cpp + ../common/for_each.cpp + # dual_distributed_vector.cpp ) # cmake-format: on From 8690c17f060c0e9583dedca0602a2dde31124646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 20:04:28 +0100 Subject: [PATCH 051/101] prog --- .../dr/mp/containers/distributed_vector.hpp | 6 +++--- .../mp/containers/dual_distributed_vector.hpp | 12 ++++++------ include/dr/mp/containers/segment.hpp | 19 ++++++++++++------- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 84590ca3f5..9025c43938 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -276,13 +276,13 @@ template class distributed_vector { void fence() { backend_.fence(); } - backend_type& backend(const std::size_t segment_index) { - return backend_; - } + backend_type& backend(const std::size_t segment_index) { return backend_; } const backend_type& backend(const std::size_t segment_index) const { return backend_; } + T *data(const std::size_t segment_index) { return data_; } + private: void init(auto size, auto dist) { size_ = size; diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 56be62fa10..11335f6565 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -244,14 +244,14 @@ class dual_distributed_vector { void fence(const std::size_t i) { backends_[i].fence(); } backend_type& backend(const std::size_t segment_index) { - auto comm_size = default_comm().size(); - // std::cout << "backend(" << segment_index << ") -> " << (segment_index < comm_size ? 0 : 1) << "\n"; - return backends_[segment_index < comm_size ? 0 : 1]; + return backends_[segment_index < default_comm().size() ? 0 : 1]; } const backend_type& backend(const std::size_t segment_index) const { - auto comm_size = default_comm().size(); - // std::cout << "backend(" << segment_index << ") -> " << (segment_index < comm_size ? 0 : 1) << "\n"; - return backends_[segment_index < comm_size ? 0 : 1]; + return backends_[segment_index < default_comm().size() ? 0 : 1]; + } + + T *data(const std::size_t segment_index) { + return datas_[segment_index < default_comm().size() ? 0 : 1]; } private: diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 6511c304a8..bb3d5c8fb3 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -127,7 +127,7 @@ template class dv_segment_iterator { assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; - dv_->backend(segment_index_).getmem(dst, segment_offset * sizeof(value_type), + backend().getmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -143,7 +143,7 @@ template class dv_segment_iterator { auto segment_offset = index_ + dv_->distribution_.halo().prev; dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); - dv_->backend(segment_index_).putmem(dst, segment_offset * sizeof(value_type), + backend().putmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -158,10 +158,10 @@ template class dv_segment_iterator { #ifndef SYCL_LANGUAGE_VERSION assert(dv_ != nullptr); #endif - const auto my_process_segment_index = dv_->backend(segment_index_).getrank(); + const auto my_process_segment_index = backend().getrank(); if (my_process_segment_index == segment_index_) - return dv_->data_ + index_ + dv_->distribution_.halo().prev; + return data() + index_ + dv_->distribution_.halo().prev; #ifndef SYCL_LANGUAGE_VERSION assert(!dv_->distribution_.halo().periodic); // not implemented #endif @@ -171,7 +171,7 @@ template class dv_segment_iterator { assert(index_ <= dv_->distribution_.halo() .next); // <= instead of < to cover end() case #endif - return dv_->data_ + dv_->distribution_.halo().prev + index_ + + return data() + dv_->distribution_.halo().prev + index_ + dv_->segment_size_; } @@ -179,16 +179,21 @@ template class dv_segment_iterator { #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); #endif - return dv_->data_ + dv_->distribution_.halo().prev + index_ - + return data() + dv_->distribution_.halo().prev + index_ - dv_->segment_size_; } #ifndef SYCL_LANGUAGE_VERSION assert(false); // trying to read non-owned memory #endif - return static_castdata_)>(nullptr); + return static_cast(nullptr); } + auto backend() { return dv_->backend(segment_index_); } + const auto backend() const { return dv_->backend(segment_index_); } + + auto data() { return dv_->data(segment_index_); } + auto segments() const { assert(dv_ != nullptr); return dr::__detail::drop_segments(dv_->segments(), segment_index_, index_); From 38609474160a87749679d6f21e6b5ca159cfa3c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 20:07:01 +0100 Subject: [PATCH 052/101] prog --- include/dr/mp/containers/segment.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index bb3d5c8fb3..7a272afb8a 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -144,7 +144,7 @@ template class dv_segment_iterator { dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); backend().putmem(dst, segment_offset * sizeof(value_type), - size * sizeof(value_type), segment_index_); + size * sizeof(value_type), segment_index_); } void put(const value_type &value) const { put(&value, 1); } @@ -189,10 +189,10 @@ template class dv_segment_iterator { return static_cast(nullptr); } - auto backend() { return dv_->backend(segment_index_); } - const auto backend() const { return dv_->backend(segment_index_); } + auto& backend() { return dv_->backend(segment_index_); } + const auto& backend() const { return dv_->backend(segment_index_); } - auto data() { return dv_->data(segment_index_); } + auto* data() { return dv_->data(segment_index_); } auto segments() const { assert(dv_ != nullptr); From 6dcd2f4cdb49b9de91d82ef07bd1be20000a58f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 20:08:50 +0100 Subject: [PATCH 053/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 6 +++--- include/dr/mp/containers/dual_distributed_vector.hpp | 6 +++--- include/dr/mp/containers/segment.hpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 9025c43938..373866fb42 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -277,9 +277,9 @@ template class distributed_vector { void fence() { backend_.fence(); } backend_type& backend(const std::size_t segment_index) { return backend_; } - const backend_type& backend(const std::size_t segment_index) const { - return backend_; - } + // const backend_type& backend(const std::size_t segment_index) const { + // return backend_; + // } T *data(const std::size_t segment_index) { return data_; } diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 11335f6565..2aa0530fd9 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -246,9 +246,9 @@ class dual_distributed_vector { backend_type& backend(const std::size_t segment_index) { return backends_[segment_index < default_comm().size() ? 0 : 1]; } - const backend_type& backend(const std::size_t segment_index) const { - return backends_[segment_index < default_comm().size() ? 0 : 1]; - } + // const backend_type& backend(const std::size_t segment_index) const { + // return backends_[segment_index < default_comm().size() ? 0 : 1]; + // } T *data(const std::size_t segment_index) { return datas_[segment_index < default_comm().size() ? 0 : 1]; diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 7a272afb8a..043c53d8f3 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -190,7 +190,7 @@ template class dv_segment_iterator { } auto& backend() { return dv_->backend(segment_index_); } - const auto& backend() const { return dv_->backend(segment_index_); } + // const auto& backend() const { return dv_->backend(segment_index_); } auto* data() { return dv_->data(segment_index_); } From df559779246be02fd4eb87c50603457886e454cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 20:10:54 +0100 Subject: [PATCH 054/101] prog --- include/dr/mp/containers/segment.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 043c53d8f3..7c9cd31f27 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -189,7 +189,7 @@ template class dv_segment_iterator { return static_cast(nullptr); } - auto& backend() { return dv_->backend(segment_index_); } + auto backend() { return dv_->backend(segment_index_); } // const auto& backend() const { return dv_->backend(segment_index_); } auto* data() { return dv_->data(segment_index_); } From 4b0f293bc10be4fee9d746693909884e9f7db7b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 20:12:17 +0100 Subject: [PATCH 055/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 6 +++--- include/dr/mp/containers/dual_distributed_vector.hpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 373866fb42..9025c43938 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -277,9 +277,9 @@ template class distributed_vector { void fence() { backend_.fence(); } backend_type& backend(const std::size_t segment_index) { return backend_; } - // const backend_type& backend(const std::size_t segment_index) const { - // return backend_; - // } + const backend_type& backend(const std::size_t segment_index) const { + return backend_; + } T *data(const std::size_t segment_index) { return data_; } diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 2aa0530fd9..11335f6565 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -246,9 +246,9 @@ class dual_distributed_vector { backend_type& backend(const std::size_t segment_index) { return backends_[segment_index < default_comm().size() ? 0 : 1]; } - // const backend_type& backend(const std::size_t segment_index) const { - // return backends_[segment_index < default_comm().size() ? 0 : 1]; - // } + const backend_type& backend(const std::size_t segment_index) const { + return backends_[segment_index < default_comm().size() ? 0 : 1]; + } T *data(const std::size_t segment_index) { return datas_[segment_index < default_comm().size() ? 0 : 1]; From 0fe709c2f9abc02d8b163190969ffe3d9f28251e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 2 Jan 2025 20:14:52 +0100 Subject: [PATCH 056/101] prog --- include/dr/mp/containers/segment.hpp | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 7c9cd31f27..13d36a5caf 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -127,7 +127,7 @@ template class dv_segment_iterator { assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; - backend().getmem(dst, segment_offset * sizeof(value_type), + dv_->backend(segment_index_).getmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -143,8 +143,8 @@ template class dv_segment_iterator { auto segment_offset = index_ + dv_->distribution_.halo().prev; dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); - backend().putmem(dst, segment_offset * sizeof(value_type), - size * sizeof(value_type), segment_index_); + dv_->backend(segment_index_).putmem(dst, segment_offset * sizeof(value_type), + size * sizeof(value_type), segment_index_); } void put(const value_type &value) const { put(&value, 1); } @@ -158,10 +158,10 @@ template class dv_segment_iterator { #ifndef SYCL_LANGUAGE_VERSION assert(dv_ != nullptr); #endif - const auto my_process_segment_index = backend().getrank(); + const auto my_process_segment_index = dv_->backend(segment_index_).getrank(); if (my_process_segment_index == segment_index_) - return data() + index_ + dv_->distribution_.halo().prev; + return dv_->data(segment_index_) + index_ + dv_->distribution_.halo().prev; #ifndef SYCL_LANGUAGE_VERSION assert(!dv_->distribution_.halo().periodic); // not implemented #endif @@ -171,29 +171,24 @@ template class dv_segment_iterator { assert(index_ <= dv_->distribution_.halo() .next); // <= instead of < to cover end() case #endif - return data() + dv_->distribution_.halo().prev + index_ + - dv_->segment_size_; + return dv_->data(segment_index_) + dv_->distribution_.halo().prev + + index_ + dv_->segment_size_; } if (my_process_segment_index == segment_index_ + 1) { #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); #endif - return data() + dv_->distribution_.halo().prev + index_ - - dv_->segment_size_; + return dv_->data(segment_index_) + dv_->distribution_.halo().prev + + index_ - dv_->segment_size_; } #ifndef SYCL_LANGUAGE_VERSION assert(false); // trying to read non-owned memory #endif - return static_cast(nullptr); + return static_castdata(segment_index_))>(nullptr); } - auto backend() { return dv_->backend(segment_index_); } - // const auto& backend() const { return dv_->backend(segment_index_); } - - auto* data() { return dv_->data(segment_index_); } - auto segments() const { assert(dv_ != nullptr); return dr::__detail::drop_segments(dv_->segments(), segment_index_, index_); From 05fa8f630d4119534d2d08e7823ab8a6640f0359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 7 Jan 2025 12:41:54 +0100 Subject: [PATCH 057/101] prog --- include/dr/mp/algorithms/for_each.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index 80c1cbeb43..e98e656a59 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -20,7 +20,7 @@ namespace dr::mp { template concept dual_vector_range = - dr::distributed_range && requires(R &r) { local_segments(r)[0].is_compute(); }; + dr::distributed_range && requires(R &r) { dr::ranges::segments(r)[0].is_compute(); }; void for_each(dual_vector_range auto &&dr, auto op) { assert(false); From 0d94948cf6789c023399653bcb60124047a8ae3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 7 Jan 2025 13:22:20 +0100 Subject: [PATCH 058/101] prog --- include/dr/mp/algorithms/for_each.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index e98e656a59..89dc3f986a 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -31,7 +31,7 @@ void for_each(dual_vector_range auto &&dr, auto op) { } assert(aligned(dr)); - for (auto &s : local_segments(dr)) { + for (auto &s : dr::ranges::segments(r)) { if (!s.is_compute()) { s.swap_state(); continue; From 33a1d4f72b594feed2dd47b825e71d5ccc70e7e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 7 Jan 2025 13:23:07 +0100 Subject: [PATCH 059/101] prog --- include/dr/mp/algorithms/for_each.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index 89dc3f986a..72b92867fc 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -31,7 +31,7 @@ void for_each(dual_vector_range auto &&dr, auto op) { } assert(aligned(dr)); - for (auto &s : dr::ranges::segments(r)) { + for (auto &s : dr::ranges::segments(dr)) { if (!s.is_compute()) { s.swap_state(); continue; From 8dd8a0052e674cd422623de8c4278eea240d70fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 7 Jan 2025 13:25:55 +0100 Subject: [PATCH 060/101] prog --- include/dr/mp/containers/dual_distributed_vector.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 11335f6565..ff56a4ceaf 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -240,6 +240,7 @@ class dual_distributed_vector { auto &halo() const { return *halo_; } auto segments() const { return rng::views::all(segments_); } + auto segments() { return rng::views::all(segments_); } void fence(const std::size_t i) { backends_[i].fence(); } From d04461d940effed936f8efca7883c65cf2032f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 13 Jan 2025 19:47:13 +0100 Subject: [PATCH 061/101] prog --- include/dr/mp/algorithms/for_each.hpp | 8 ++++++-- include/dr/mp/containers/dual_distributed_vector.hpp | 8 ++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index 72b92867fc..404813dd72 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -23,12 +23,16 @@ concept dual_vector_range = dr::distributed_range && requires(R &r) { dr::ranges::segments(r)[0].is_compute(); }; void for_each(dual_vector_range auto &&dr, auto op) { - assert(false); + partial_for_each(dr, op); + partial_for_each(dr, op); +} - dr::drlog.debug(dr::logger::for_each, "for_each: parallel execution\n"); +void partial_for_each(dual_vector_range auto &&dr, auto op) { + dr::drlog.debug(dr::logger::for_each, "partial_for_each: parallel execution\n"); if (rng::empty(dr)) { return; } + assert(aligned(dr)); for (auto &s : dr::ranges::segments(dr)) { diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index ff56a4ceaf..878262141d 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -330,9 +330,9 @@ class dual_distributed_vector { std::vector backends_{DUAL_SEGMENTS_PER_PROC}; }; -// template -// auto &halo(const dual_distributed_vector &dv) { -// return dv.halo(); -// } +template +auto &halo(const dual_distributed_vector &dv) { + return dv.halo(); +} } // namespace dr::mp From 497eb8c72eaae8fb31db31d5cfc6a55443510b73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 13 Jan 2025 20:12:29 +0100 Subject: [PATCH 062/101] prog --- test/gtest/mp/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/gtest/mp/CMakeLists.txt b/test/gtest/mp/CMakeLists.txt index dfc5547175..3db464fcbc 100644 --- a/test/gtest/mp/CMakeLists.txt +++ b/test/gtest/mp/CMakeLists.txt @@ -59,7 +59,8 @@ add_executable( add_executable(mp-quick-test mp-tests.cpp - ../common/for_each.cpp + halo.cpp + # ../common/for_each.cpp # dual_distributed_vector.cpp ) # cmake-format: on From 29759f156409a68577614ec162071731a555e60d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 21 Jan 2025 15:29:06 +0100 Subject: [PATCH 063/101] prog --- include/dr/mp/algorithms/for_each.hpp | 4 +--- include/dr/mp/containers/dual_distributed_vector.hpp | 7 +------ include/dr/mp/containers/segment.hpp | 2 +- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index 404813dd72..0770049246 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -33,9 +33,7 @@ void partial_for_each(dual_vector_range auto &&dr, auto op) { return; } - assert(aligned(dr)); - - for (auto &s : dr::ranges::segments(dr)) { + for (auto &s : local_segments(dr)) { if (!s.is_compute()) { s.swap_state(); continue; diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 878262141d..919c6e7c3b 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -242,6 +242,7 @@ class dual_distributed_vector { auto segments() const { return rng::views::all(segments_); } auto segments() { return rng::views::all(segments_); } + __attribute__((unused)) void fence(const std::size_t i) { backends_[i].fence(); } backend_type& backend(const std::size_t segment_index) { @@ -276,8 +277,6 @@ class dual_distributed_vector { hb.next / gran}); segment_size_ = proc_segments_size; - // std::cout << "init: segment_count = " << segment_count << "\n"; - std::size_t actual_segment_count_ = size_ / segment_size_ + (size_ % segment_size_ == 0 ? 0 : 1); assert(actual_segment_count_ <= segment_count @@ -296,12 +295,8 @@ class dual_distributed_vector { halo_ = new cyclic_span_halo(halos_); - // std::cout << "entering loop, segment_size_ = " << segment_size_ << "\n"; std::size_t segment_index = 0; for (std::size_t i = 0; i < size; i += segment_size_) { - // std::cout << "\t" << i << ": segments_.emplace_back(" << segment_index - // << ", " << std::min(segment_size_, size - i) - // << ", " << data_size_ << ")\n"; segments_.emplace_back(this, segment_index++, std::min(segment_size_, size - i), data_size_); } diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 13d36a5caf..d89ec73a98 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -151,7 +151,7 @@ template class dv_segment_iterator { auto rank() const { assert(dv_ != nullptr); - return segment_index_; + return dv_->backend(segment_index_).getrank(); } auto local() const { From 73c78a4bf7c86db55bb7dc8cbca9b43f2c12065d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 27 Jan 2025 22:37:48 +0100 Subject: [PATCH 064/101] prog --- test/gtest/mp/halo.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/gtest/mp/halo.cpp b/test/gtest/mp/halo.cpp index 255832a02a..99b00cd402 100644 --- a/test/gtest/mp/halo.cpp +++ b/test/gtest/mp/halo.cpp @@ -12,12 +12,13 @@ template void local_is_accessible_in_halo_region(const int halo_prev, const int halo_next) { - DV dv(6, dr::mp::distribution().halo(halo_prev, halo_next)); + std::cout << "0\n"; + DV dv(6, dr::mp::distribution().halo(halo_prev, halo_next)); std::cout << "1\n"; DRLOG("local_is_accessible_in_halo_region TEST START, prev:{}, next:{}", halo_prev, halo_next); - iota(dv, 0); + iota(dv, 0); std::cout << "2\n"; DRLOG("exchange start"); - dv.halo().exchange(); + dv.halo().exchange(); std::cout << "3\n"; // arrays below is function depending on size of communicator-1 std::array first_local_index___; @@ -62,6 +63,7 @@ void local_is_accessible_in_halo_region(const int halo_prev, "c:{}", first_legal_idx, first_illegal_idx, c); + std::cout << "4\n"; for (int idx = first_legal_idx; idx < first_illegal_idx; ++idx) { typename DV::value_type *local_ptr = (dv.begin() + idx).local(); EXPECT_TRUE(local_ptr != nullptr); From 02c92d833a4c37899d5401b807f68013c0d9e76f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 20:38:18 +0100 Subject: [PATCH 065/101] prog --- test/gtest/mp/xp-tests.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/gtest/mp/xp-tests.hpp b/test/gtest/mp/xp-tests.hpp index ea267fea54..f035987c55 100644 --- a/test/gtest/mp/xp-tests.hpp +++ b/test/gtest/mp/xp-tests.hpp @@ -44,15 +44,15 @@ using AllTypes = using IshmemTypes = ::testing::Types>; #else -using AllTypes = ::testing::Types, - dr::mp::dual_distributed_vector>; -using IshmemTypes = ::testing::Types, - dr::mp::dual_distributed_vector>; +using AllTypes = ::testing::Types>; //, + //dr::mp::dual_distributed_vector>; +using IshmemTypes = ::testing::Types>;//, + //dr::mp::dual_distributed_vector>; #endif using AllTypesWithoutIshmem = - ::testing::Types, - dr::mp::dual_distributed_vector>; + ::testing::Types>; //, + //dr::mp::dual_distributed_vector>; namespace dr::mp { From 6d365923d14a08eabe9e3574a6367520662a099d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 20:41:58 +0100 Subject: [PATCH 066/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 9025c43938..175372da23 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -285,6 +285,7 @@ template class distributed_vector { private: void init(auto size, auto dist) { + std::cout << "init 0\n"; size_ = size; distribution_ = dist; @@ -301,18 +302,26 @@ template class distributed_vector { data_size_ = segment_size_ + hb.prev + hb.next; + std::cout << "init 1\n"; + if (size_ > 0) { data_ = static_cast(backend_.allocate(data_size_ * sizeof(T))); } + std::cout << "init 2\n"; + halo_ = new span_halo(default_comm(), data_, data_size_, hb); + std::cout << "init 3\n"; + std::size_t segment_index = 0; for (std::size_t i = 0; i < size; i += segment_size_) { segments_.emplace_back(this, segment_index++, std::min(segment_size_, size - i), data_size_); } + std::cout << "init 4\n"; + fence(); } From 5a4830020e17a1667500f3568b9af18747a13567 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 20:45:31 +0100 Subject: [PATCH 067/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 175372da23..bcde233f6e 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -16,11 +16,11 @@ class MpiBackend { public: void *allocate(std::size_t data_size) { - assert(data_size > 0); - void *data = __detail::allocator().allocate(data_size); - DRLOG("called MPI allocate({}) -> got:{}", data_size, data); - win_.create(default_comm(), data, data_size); - active_wins().insert(win_.mpi_win()); + assert(data_size > 0); std::cout << "alloc 0\n"; + void *data = __detail::allocator().allocate(data_size); std::cout << "alloc 1\n"; + DRLOG("called MPI allocate({}) -> got:{}", data_size, data); std::cout << "alloc 2\n"; + win_.create(default_comm(), data, data_size); std::cout << "alloc 3\n"; + active_wins().insert(win_.mpi_win()); std::cout << "alloc 4\n"; return data; } From 2955cdc864fdadca31ffa8ae521240b24f928a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 20:48:20 +0100 Subject: [PATCH 068/101] prog --- include/dr/detail/communicator.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/dr/detail/communicator.hpp b/include/dr/detail/communicator.hpp index 331253ab63..4f0f130d0a 100644 --- a/include/dr/detail/communicator.hpp +++ b/include/dr/detail/communicator.hpp @@ -230,10 +230,10 @@ class communicator { class rma_window { public: void create(communicator comm, void *data, std::size_t size) { - local_data_ = data; - communicator_ = comm; + local_data_ = data; std::cout << "create 0\n"; + communicator_ = comm; std::cout << "create 1\n"; DRLOG("win create:: size: {} data:{}", size, data); - MPI_Win_create(data, size, 1, MPI_INFO_NULL, comm.mpi_comm(), &win_); + MPI_Win_create(data, size, 1, MPI_INFO_NULL, comm.mpi_comm(), &win_); std::cout << "create 2\n"; } template auto local_data() { From d5ad221219272499df2eb510a058208e4da2f4d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 21:06:17 +0100 Subject: [PATCH 069/101] prog --- include/dr/mp/containers/segment.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index d89ec73a98..e44081afa8 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -138,6 +138,7 @@ template class dv_segment_iterator { } void put(const value_type *dst, std::size_t size) const { + std::cout << "put with (size=" << size << " segment_index_=" << segment_index_ << " index_=" << index_ << ")\n"; assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; From b02102f3d89236bc62e73ca378cbb8d5f78468cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 21:13:42 +0100 Subject: [PATCH 070/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 1 + include/dr/mp/containers/segment.hpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index bcde233f6e..aaf99e9013 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -316,6 +316,7 @@ template class distributed_vector { std::size_t segment_index = 0; for (std::size_t i = 0; i < size; i += segment_size_) { + std::cout << "segments_.emplace_back si=" << segment_index << " size=" << std::min(segment_size_, size - i) << " reserved=" << data_size_ << "\n"; segments_.emplace_back(this, segment_index++, std::min(segment_size_, size - i), data_size_); } diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index e44081afa8..bfa756edb3 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -145,7 +145,7 @@ template class dv_segment_iterator { dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); dv_->backend(segment_index_).putmem(dst, segment_offset * sizeof(value_type), - size * sizeof(value_type), segment_index_); + size * sizeof(value_type), rank()); } void put(const value_type &value) const { put(&value, 1); } From 8d798eb0bc367ec62854ae8abc430e135cd21f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 21:20:38 +0100 Subject: [PATCH 071/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index aaf99e9013..cecfae9bed 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -57,6 +57,8 @@ class MpiBackend { void putmem(void const *src, std::size_t offset, std::size_t datalen, int segment_index) { + + std::cout << "calling MPI put(segm_offset:" << offset << ", src:" << src << ", size:" << datalen << ", peer:" << segment_index << ")"; DRLOG("calling MPI put(segm_offset:{}, " "src:{}, size:{}, peer:{})", offset, src, datalen, segment_index); From d658d77234d9be6937a3ab1c9bc071ef0c137b04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 21:29:08 +0100 Subject: [PATCH 072/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 2 +- test/gtest/mp/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index cecfae9bed..33f9d8240c 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -58,7 +58,7 @@ class MpiBackend { void putmem(void const *src, std::size_t offset, std::size_t datalen, int segment_index) { - std::cout << "calling MPI put(segm_offset:" << offset << ", src:" << src << ", size:" << datalen << ", peer:" << segment_index << ")"; + std::cout << "calling MPI put(segm_offset:" << offset << ", src:" << src << ", size:" << datalen << ", peer:" << segment_index << ")\n"; DRLOG("calling MPI put(segm_offset:{}, " "src:{}, size:{}, peer:{})", offset, src, datalen, segment_index); diff --git a/test/gtest/mp/CMakeLists.txt b/test/gtest/mp/CMakeLists.txt index 3db464fcbc..e1f3ba70f4 100644 --- a/test/gtest/mp/CMakeLists.txt +++ b/test/gtest/mp/CMakeLists.txt @@ -65,7 +65,7 @@ add_executable(mp-quick-test ) # cmake-format: on -target_compile_definitions(mp-quick-test PRIVATE QUICK_TEST) +target_compile_definitions(mp-quick-test PRIVATE QUICK_TEST DR_FORMAT) foreach(test-exec IN ITEMS mp-tests mp-tests-3 mp-quick-test) if(ENABLE_ISHMEM) From 4e33bd04474e89f4404179f854531094779643fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 21:30:05 +0100 Subject: [PATCH 073/101] prog --- include/dr/detail/communicator.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/dr/detail/communicator.hpp b/include/dr/detail/communicator.hpp index 4f0f130d0a..2698a7c928 100644 --- a/include/dr/detail/communicator.hpp +++ b/include/dr/detail/communicator.hpp @@ -282,6 +282,7 @@ class rma_window { #if (MPI_VERSION >= 4) || \ (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000)) + std::cout << "MPI_RPUT_C: rank=" << rank << "\n"; MPI_Rput_c(src, size, MPI_BYTE, rank, disp, size, MPI_BYTE, win_, &request); #else // MPI_Rput origin_count is 32-bit signed int - check range From 6d108c425612c745d48ee6379cf6b78c526d24d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 21:42:54 +0100 Subject: [PATCH 074/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 33f9d8240c..f6a6dbc104 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -25,12 +25,12 @@ class MpiBackend { } void deallocate(void *data, std::size_t data_size) { - assert(data_size > 0); - DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); - active_wins().erase(win_.mpi_win()); - win_.free(); + assert(data_size > 0); std::cout << "dealloc 0\n"; + DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); std::cout << "dealloc 1\n"; + active_wins().erase(win_.mpi_win()); std::cout << "dealloc 2\n"; + win_.free(); std::cout << "dealloc 3\n"; __detail::allocator().deallocate(static_cast(data), - data_size); + data_size); std::cout << "dealloc 4\n"; } void getmem(void *dst, std::size_t offset, std::size_t datalen, @@ -253,13 +253,18 @@ template class distributed_vector { } ~distributed_vector() { + std::cout << "~distributed_vector 0\n"; if (!finalized()) { fence(); + std::cout << "~distributed_vector 1\n"; if (data_ != nullptr) { backend_.deallocate(data_, data_size_ * sizeof(value_type)); } + std::cout << "~distributed_vector 2\n"; delete halo_; + + std::cout << "~distributed_vector 3\n"; } } From 381159fa04160e4fea54d54194d517e395d5ad4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Sat, 1 Feb 2025 21:58:27 +0100 Subject: [PATCH 075/101] prog --- include/dr/detail/communicator.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/dr/detail/communicator.hpp b/include/dr/detail/communicator.hpp index 2698a7c928..d273678a57 100644 --- a/include/dr/detail/communicator.hpp +++ b/include/dr/detail/communicator.hpp @@ -232,7 +232,8 @@ class rma_window { void create(communicator comm, void *data, std::size_t size) { local_data_ = data; std::cout << "create 0\n"; communicator_ = comm; std::cout << "create 1\n"; - DRLOG("win create:: size: {} data:{}", size, data); + DRLOG("win create:: size: {} data:{}", size, data); + std::cout << "MPI_WIN_CREATE " << data << " " << size << " " << &win_ << "\n"; MPI_Win_create(data, size, 1, MPI_INFO_NULL, comm.mpi_comm(), &win_); std::cout << "create 2\n"; } From 3d2edda8b81a02a22966e5af0e01374ebc350eb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 10 Feb 2025 22:15:23 +0100 Subject: [PATCH 076/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 4 ++++ test/gtest/mp/halo.cpp | 2 ++ 2 files changed, 6 insertions(+) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index f6a6dbc104..252f4e3ecf 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -21,10 +21,14 @@ class MpiBackend { DRLOG("called MPI allocate({}) -> got:{}", data_size, data); std::cout << "alloc 2\n"; win_.create(default_comm(), data, data_size); std::cout << "alloc 3\n"; active_wins().insert(win_.mpi_win()); std::cout << "alloc 4\n"; + + std::cout << "allocated: " << data << std::endl; return data; } void deallocate(void *data, std::size_t data_size) { + std::cout << "deallocating: " << data << std::endl; + assert(data_size > 0); std::cout << "dealloc 0\n"; DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); std::cout << "dealloc 1\n"; active_wins().erase(win_.mpi_win()); std::cout << "dealloc 2\n"; diff --git a/test/gtest/mp/halo.cpp b/test/gtest/mp/halo.cpp index 99b00cd402..f97bf0f0d3 100644 --- a/test/gtest/mp/halo.cpp +++ b/test/gtest/mp/halo.cpp @@ -79,6 +79,8 @@ void local_is_accessible_in_halo_region(const int halo_prev, } DRLOG("checks ok"); + std::cout << "5\n"; + // although assertions indeed happen, but they are not caught by EXPECT_DEATH // if (first_illegal_idx < 6) { // dr::drlog.debug("checking first illegal idx:{} after legal ones\n", From ccc10336f0e034a69bfe24dca3f506582d9ab621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 10 Feb 2025 22:27:13 +0100 Subject: [PATCH 077/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 252f4e3ecf..0a6c12fa37 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -13,11 +13,12 @@ namespace dr::mp { class MpiBackend { dr::rma_window win_; + __detail::allocator allocator_; public: void *allocate(std::size_t data_size) { assert(data_size > 0); std::cout << "alloc 0\n"; - void *data = __detail::allocator().allocate(data_size); std::cout << "alloc 1\n"; + void *data = allocator_.allocate(data_size); std::cout << "alloc 1\n"; DRLOG("called MPI allocate({}) -> got:{}", data_size, data); std::cout << "alloc 2\n"; win_.create(default_comm(), data, data_size); std::cout << "alloc 3\n"; active_wins().insert(win_.mpi_win()); std::cout << "alloc 4\n"; @@ -33,8 +34,7 @@ class MpiBackend { DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); std::cout << "dealloc 1\n"; active_wins().erase(win_.mpi_win()); std::cout << "dealloc 2\n"; win_.free(); std::cout << "dealloc 3\n"; - __detail::allocator().deallocate(static_cast(data), - data_size); std::cout << "dealloc 4\n"; + allocator_.deallocate(static_cast(data), data_size); std::cout << "dealloc 4\n"; } void getmem(void *dst, std::size_t offset, std::size_t datalen, From 3723fef5640169e3613e3435ae3432f169823a91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 10 Feb 2025 22:35:03 +0100 Subject: [PATCH 078/101] prog --- include/dr/mp/allocator.hpp | 3 +++ include/dr/mp/containers/distributed_vector.hpp | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/dr/mp/allocator.hpp b/include/dr/mp/allocator.hpp index 4c40b3ccf2..fadcac2148 100644 --- a/include/dr/mp/allocator.hpp +++ b/include/dr/mp/allocator.hpp @@ -33,6 +33,7 @@ template class allocator { } void deallocate(T *ptr, std::size_t sz) { + std::cout << "deallocate(" << ptr << "," << sz << ")\n"; if (sz == 0) { assert(ptr == nullptr); return; @@ -40,11 +41,13 @@ template class allocator { assert(ptr != nullptr); #ifdef SYCL_LANGUAGE_VERSION if (mp::use_sycl()) { + std::cout << "deallocating with sycl\n"; sycl::free(ptr, sycl_queue()); return; } #endif + std::cout << "deallocating with std\n"; std_allocator_.deallocate(ptr, sz); } diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 0a6c12fa37..252f4e3ecf 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -13,12 +13,11 @@ namespace dr::mp { class MpiBackend { dr::rma_window win_; - __detail::allocator allocator_; public: void *allocate(std::size_t data_size) { assert(data_size > 0); std::cout << "alloc 0\n"; - void *data = allocator_.allocate(data_size); std::cout << "alloc 1\n"; + void *data = __detail::allocator().allocate(data_size); std::cout << "alloc 1\n"; DRLOG("called MPI allocate({}) -> got:{}", data_size, data); std::cout << "alloc 2\n"; win_.create(default_comm(), data, data_size); std::cout << "alloc 3\n"; active_wins().insert(win_.mpi_win()); std::cout << "alloc 4\n"; @@ -34,7 +33,8 @@ class MpiBackend { DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); std::cout << "dealloc 1\n"; active_wins().erase(win_.mpi_win()); std::cout << "dealloc 2\n"; win_.free(); std::cout << "dealloc 3\n"; - allocator_.deallocate(static_cast(data), data_size); std::cout << "dealloc 4\n"; + __detail::allocator().deallocate(static_cast(data), + data_size); std::cout << "dealloc 4\n"; } void getmem(void *dst, std::size_t offset, std::size_t datalen, From 6e2f48bf85fd9603fe67f630129807ce3435c6ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 10 Feb 2025 22:55:28 +0100 Subject: [PATCH 079/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 252f4e3ecf..656367ab90 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -33,8 +33,8 @@ class MpiBackend { DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); std::cout << "dealloc 1\n"; active_wins().erase(win_.mpi_win()); std::cout << "dealloc 2\n"; win_.free(); std::cout << "dealloc 3\n"; - __detail::allocator().deallocate(static_cast(data), - data_size); std::cout << "dealloc 4\n"; + // __detail::allocator().deallocate(static_cast(data), + // data_size); std::cout << "dealloc 4\n"; } void getmem(void *dst, std::size_t offset, std::size_t datalen, From 741b0d82bc9a98af5838a81d72d7ba59343e1c14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 11 Feb 2025 12:05:24 +0100 Subject: [PATCH 080/101] prog --- include/dr/mp/containers/distributed_vector.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 656367ab90..252f4e3ecf 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -33,8 +33,8 @@ class MpiBackend { DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); std::cout << "dealloc 1\n"; active_wins().erase(win_.mpi_win()); std::cout << "dealloc 2\n"; win_.free(); std::cout << "dealloc 3\n"; - // __detail::allocator().deallocate(static_cast(data), - // data_size); std::cout << "dealloc 4\n"; + __detail::allocator().deallocate(static_cast(data), + data_size); std::cout << "dealloc 4\n"; } void getmem(void *dst, std::size_t offset, std::size_t datalen, From a1483a2c6c65138ccdbb40ff8d8562dda64adfe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 18 Feb 2025 22:21:44 +0100 Subject: [PATCH 081/101] prog --- include/dr/detail/communicator.hpp | 8 ++-- include/dr/mp/algorithms/for_each.hpp | 2 + include/dr/mp/allocator.hpp | 3 -- .../dr/mp/containers/distributed_vector.hpp | 41 +++++-------------- .../mp/containers/dual_distributed_vector.hpp | 9 +++- include/dr/mp/containers/dual_segment.hpp | 37 ++++++++++++++++- include/dr/mp/containers/segment.hpp | 11 ++++- include/dr/mp/views/views.hpp | 1 + test/gtest/mp/halo.cpp | 14 +++---- test/gtest/mp/xp-tests.hpp | 12 +++--- 10 files changed, 82 insertions(+), 56 deletions(-) diff --git a/include/dr/detail/communicator.hpp b/include/dr/detail/communicator.hpp index d273678a57..7d41de05a5 100644 --- a/include/dr/detail/communicator.hpp +++ b/include/dr/detail/communicator.hpp @@ -230,11 +230,10 @@ class communicator { class rma_window { public: void create(communicator comm, void *data, std::size_t size) { - local_data_ = data; std::cout << "create 0\n"; - communicator_ = comm; std::cout << "create 1\n"; + local_data_ = data; + communicator_ = comm; DRLOG("win create:: size: {} data:{}", size, data); - std::cout << "MPI_WIN_CREATE " << data << " " << size << " " << &win_ << "\n"; - MPI_Win_create(data, size, 1, MPI_INFO_NULL, comm.mpi_comm(), &win_); std::cout << "create 2\n"; + MPI_Win_create(data, size, 1, MPI_INFO_NULL, comm.mpi_comm(), &win_); } template auto local_data() { @@ -283,7 +282,6 @@ class rma_window { #if (MPI_VERSION >= 4) || \ (defined(I_MPI_NUMVERSION) && (I_MPI_NUMVERSION > 20211200000)) - std::cout << "MPI_RPUT_C: rank=" << rank << "\n"; MPI_Rput_c(src, size, MPI_BYTE, rank, disp, size, MPI_BYTE, win_, &request); #else // MPI_Rput origin_count is 32-bit signed int - check range diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index 0770049246..4e7ae04332 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -28,6 +28,7 @@ void for_each(dual_vector_range auto &&dr, auto op) { } void partial_for_each(dual_vector_range auto &&dr, auto op) { + std::cout << "partial_for_each()" << std::endl; dr::drlog.debug(dr::logger::for_each, "partial_for_each: parallel execution\n"); if (rng::empty(dr)) { return; @@ -83,6 +84,7 @@ void for_each(dr::distributed_range auto &&dr, auto op) { assert(false); #endif } else { + std::cout << "for_eaching" << std::endl; dr::drlog.debug(" using cpu\n"); rng::for_each(s, op); } diff --git a/include/dr/mp/allocator.hpp b/include/dr/mp/allocator.hpp index fadcac2148..4c40b3ccf2 100644 --- a/include/dr/mp/allocator.hpp +++ b/include/dr/mp/allocator.hpp @@ -33,7 +33,6 @@ template class allocator { } void deallocate(T *ptr, std::size_t sz) { - std::cout << "deallocate(" << ptr << "," << sz << ")\n"; if (sz == 0) { assert(ptr == nullptr); return; @@ -41,13 +40,11 @@ template class allocator { assert(ptr != nullptr); #ifdef SYCL_LANGUAGE_VERSION if (mp::use_sycl()) { - std::cout << "deallocating with sycl\n"; sycl::free(ptr, sycl_queue()); return; } #endif - std::cout << "deallocating with std\n"; std_allocator_.deallocate(ptr, sz); } diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 252f4e3ecf..22075ec2a5 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -16,25 +16,20 @@ class MpiBackend { public: void *allocate(std::size_t data_size) { - assert(data_size > 0); std::cout << "alloc 0\n"; - void *data = __detail::allocator().allocate(data_size); std::cout << "alloc 1\n"; - DRLOG("called MPI allocate({}) -> got:{}", data_size, data); std::cout << "alloc 2\n"; - win_.create(default_comm(), data, data_size); std::cout << "alloc 3\n"; - active_wins().insert(win_.mpi_win()); std::cout << "alloc 4\n"; - - std::cout << "allocated: " << data << std::endl; + void *data = __detail::allocator().allocate(data_size); + DRLOG("called MPI allocate({}) -> got:{}", data_size, data); + win_.create(default_comm(), data, data_size); + active_wins().insert(win_.mpi_win()); return data; } void deallocate(void *data, std::size_t data_size) { - std::cout << "deallocating: " << data << std::endl; - - assert(data_size > 0); std::cout << "dealloc 0\n"; - DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); std::cout << "dealloc 1\n"; - active_wins().erase(win_.mpi_win()); std::cout << "dealloc 2\n"; - win_.free(); std::cout << "dealloc 3\n"; + assert(data_size > 0); + DRLOG("calling MPI deallocate ({}, data_size:{})", data, data_size); + active_wins().erase(win_.mpi_win()); + win_.free(); __detail::allocator().deallocate(static_cast(data), - data_size); std::cout << "dealloc 4\n"; + data_size); } void getmem(void *dst, std::size_t offset, std::size_t datalen, @@ -61,8 +56,6 @@ class MpiBackend { void putmem(void const *src, std::size_t offset, std::size_t datalen, int segment_index) { - - std::cout << "calling MPI put(segm_offset:" << offset << ", src:" << src << ", size:" << datalen << ", peer:" << segment_index << ")\n"; DRLOG("calling MPI put(segm_offset:{}, " "src:{}, size:{}, peer:{})", offset, src, datalen, segment_index); @@ -257,18 +250,14 @@ template class distributed_vector { } ~distributed_vector() { - std::cout << "~distributed_vector 0\n"; if (!finalized()) { fence(); - std::cout << "~distributed_vector 1\n"; + if (data_ != nullptr) { backend_.deallocate(data_, data_size_ * sizeof(value_type)); } - std::cout << "~distributed_vector 2\n"; delete halo_; - - std::cout << "~distributed_vector 3\n"; } } @@ -296,7 +285,6 @@ template class distributed_vector { private: void init(auto size, auto dist) { - std::cout << "init 0\n"; size_ = size; distribution_ = dist; @@ -313,27 +301,18 @@ template class distributed_vector { data_size_ = segment_size_ + hb.prev + hb.next; - std::cout << "init 1\n"; - if (size_ > 0) { data_ = static_cast(backend_.allocate(data_size_ * sizeof(T))); } - std::cout << "init 2\n"; - halo_ = new span_halo(default_comm(), data_, data_size_, hb); - std::cout << "init 3\n"; - std::size_t segment_index = 0; for (std::size_t i = 0; i < size; i += segment_size_) { - std::cout << "segments_.emplace_back si=" << segment_index << " size=" << std::min(segment_size_, size - i) << " reserved=" << data_size_ << "\n"; segments_.emplace_back(this, segment_index++, std::min(segment_size_, size - i), data_size_); } - std::cout << "init 4\n"; - fence(); } diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 919c6e7c3b..49116608dc 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -240,7 +240,7 @@ class dual_distributed_vector { auto &halo() const { return *halo_; } auto segments() const { return rng::views::all(segments_); } - auto segments() { return rng::views::all(segments_); } + // auto segments() { return rng::views::all(segments_); } __attribute__((unused)) void fence(const std::size_t i) { backends_[i].fence(); } @@ -284,6 +284,12 @@ class dual_distributed_vector { data_size_ = segment_size_ + hb.prev + hb.next; + std::cout << "creating dual_distributed vector\n" + << "\tsize: " << size << "\n" + << "\tsegment_size_: " << segment_size_ << "\n" + << "\tactual_segment_count_: " << actual_segment_count_ << "\n" + << "\tdata_size_: " << data_size_ << "\n"; + for (std::size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { if (size_ > 0) { datas_.push_back(static_cast( @@ -311,6 +317,7 @@ class dual_distributed_vector { } friend dv_segment_iterator; + // friend dual_dv_segment_iterator; std::size_t segment_size_ = 0; std::size_t data_size_ = 0; // size + halo diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp index d94b7ccf9b..37bdfe2745 100644 --- a/include/dr/mp/containers/dual_segment.hpp +++ b/include/dr/mp/containers/dual_segment.hpp @@ -8,7 +8,26 @@ namespace dr::mp { -template +// template +// class dual_dv_segment_iterator : public dv_segment_iterator { +// public: +// dual_dv_segment_iterator() = default; +// dual_dv_segment_iterator(DV *dv, std::size_t segment_index, std::size_t index) +// : dv_segment_iterator(dv, segment_index, index) { +// } + +// auto rank() const { +// std::cout << "rank(): segment_index_ == " << this->segment_index_ << "\n"; + +// if (this->segment_index_ < default_comm().size()) { +// return this->segment_index_; +// } + +// return 2 * default_comm().size() - this->segment_index_ - 1; +// } +// }; + +template class dual_dv_segment : public dv_segment { private: using iterator = dv_segment_iterator; @@ -21,6 +40,22 @@ class dual_dv_segment : public dv_segment { : dv_segment(dv, segment_index, size, reserved) { } + // auto size() const { + // assert(this->dv_ != nullptr); + // return this->size_; + // } + + // auto begin() const { return iterator(this->dv_, this->segment_index_, 0); } + // auto end() const { return begin() + size(); } + // auto reserved() const { return this->reserved_; } + + // auto operator[](difference_type n) const { return *(begin() + n); } + + bool is_local() const { + return this->segment_index_ == default_comm().rank() + || this->segment_index_ == 2 * default_comm().size() - default_comm().rank() - 1; + } + bool is_compute() const { return _is_compute; } void swap_state() { _is_compute = !_is_compute; } diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index bfa756edb3..a22ca661f2 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -152,7 +152,14 @@ template class dv_segment_iterator { auto rank() const { assert(dv_ != nullptr); - return dv_->backend(segment_index_).getrank(); + + std::cout << "rank(): segment_index_ == " << this->segment_index_ << "\n"; + + if (this->segment_index_ < default_comm().size()) { + return this->segment_index_; + } + + return 2 * default_comm().size() - this->segment_index_ - 1; } auto local() const { @@ -241,7 +248,7 @@ template class dv_segment { bool is_local() const { return segment_index_ == default_comm().rank(); } -private: +protected: DV *dv_ = nullptr; std::size_t segment_index_; std::size_t size_; diff --git a/include/dr/mp/views/views.hpp b/include/dr/mp/views/views.hpp index 44310a7f75..a31142548d 100644 --- a/include/dr/mp/views/views.hpp +++ b/include/dr/mp/views/views.hpp @@ -14,6 +14,7 @@ namespace dr::mp { // segment to local template auto local_segments(R &&dr) { auto is_local = [](const auto &segment) { + std::cout << "local_segments::is_local: [seg rank] " << dr::ranges::rank(segment) << " == " << default_comm().rank() << " [comm rank]" << std::endl; return dr::ranges::rank(segment) == default_comm().rank(); }; // Convert from remote iter to local iter diff --git a/test/gtest/mp/halo.cpp b/test/gtest/mp/halo.cpp index f97bf0f0d3..6d4394ed96 100644 --- a/test/gtest/mp/halo.cpp +++ b/test/gtest/mp/halo.cpp @@ -12,13 +12,13 @@ template void local_is_accessible_in_halo_region(const int halo_prev, const int halo_next) { - std::cout << "0\n"; - DV dv(6, dr::mp::distribution().halo(halo_prev, halo_next)); std::cout << "1\n"; + DV dv(6, dr::mp::distribution().halo(halo_prev, halo_next)); DRLOG("local_is_accessible_in_halo_region TEST START, prev:{}, next:{}", halo_prev, halo_next); - iota(dv, 0); std::cout << "2\n"; + iota(dv, 0); DRLOG("exchange start"); - dv.halo().exchange(); std::cout << "3\n"; + + dv.halo().exchange(); // arrays below is function depending on size of communicator-1 std::array first_local_index___; @@ -59,11 +59,13 @@ void local_is_accessible_in_halo_region(const int halo_prev, auto first_legal_idx = std::max(0, first_local_index___[c] - halo_prev); auto first_illegal_idx = std::min(6, first_nonlocal_index[c] + halo_next); + std::cout << "first_legal_idx: " << first_legal_idx << "\n"; + std::cout << "first_illegal_idx: " << first_illegal_idx << "\n"; + DRLOG("checking access to idx between first legal {} and first illegal {}, " "c:{}", first_legal_idx, first_illegal_idx, c); - std::cout << "4\n"; for (int idx = first_legal_idx; idx < first_illegal_idx; ++idx) { typename DV::value_type *local_ptr = (dv.begin() + idx).local(); EXPECT_TRUE(local_ptr != nullptr); @@ -79,8 +81,6 @@ void local_is_accessible_in_halo_region(const int halo_prev, } DRLOG("checks ok"); - std::cout << "5\n"; - // although assertions indeed happen, but they are not caught by EXPECT_DEATH // if (first_illegal_idx < 6) { // dr::drlog.debug("checking first illegal idx:{} after legal ones\n", diff --git a/test/gtest/mp/xp-tests.hpp b/test/gtest/mp/xp-tests.hpp index f035987c55..d24b02ef2e 100644 --- a/test/gtest/mp/xp-tests.hpp +++ b/test/gtest/mp/xp-tests.hpp @@ -44,15 +44,15 @@ using AllTypes = using IshmemTypes = ::testing::Types>; #else -using AllTypes = ::testing::Types>; //, - //dr::mp::dual_distributed_vector>; -using IshmemTypes = ::testing::Types>;//, - //dr::mp::dual_distributed_vector>; +using AllTypes = ::testing::Types, + dr::mp::dual_distributed_vector>; +using IshmemTypes = ::testing::Types, + dr::mp::dual_distributed_vector>; #endif using AllTypesWithoutIshmem = - ::testing::Types>; //, - //dr::mp::dual_distributed_vector>; + ::testing::Types, + dr::mp::dual_distributed_vector>; namespace dr::mp { From ffc698da9f6eaf1b55a4e864d9ec1a11dc072862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 26 Feb 2025 00:00:45 +0100 Subject: [PATCH 082/101] prog --- include/dr/mp/halo.hpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/include/dr/mp/halo.hpp b/include/dr/mp/halo.hpp index 87681b1d5b..50df1a7c8e 100644 --- a/include/dr/mp/halo.hpp +++ b/include/dr/mp/halo.hpp @@ -431,18 +431,23 @@ class cyclic_span_halo { } } - void exchange_begin() { + void partial_exchange_begin() { halos_[next_comm_index_]->exchange_begin(); } - void exchange_finalize() { + void partial_exchange_finalize() { + halos_[next_comm_index_]->exchange_finalize(); + } + + void partial_exchange() { + halos_[next_comm_index_]->exchange_begin(); halos_[next_comm_index_]->exchange_finalize(); - //increment_index(); } void exchange() { - halos_[next_comm_index_]->exchange(); - //increment_index(); + partial_exchange(); + increment_index(); + partial_exchange(); } void reduce_begin() { From e69d371b041f9535eef387a7ce42adda40417bff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 4 Mar 2025 20:47:59 +0100 Subject: [PATCH 083/101] prog --- include/dr/detail/communicator.hpp | 2 +- .../dr/mp/containers/distributed_vector.hpp | 17 +- .../mp/containers/dual_distributed_vector.hpp | 3 +- include/dr/mp/containers/dual_segment.hpp | 274 ++++++++++++++++-- include/dr/mp/containers/segment.hpp | 12 +- include/dr/mp/halo.hpp | 14 +- test/gtest/mp/halo.cpp | 2 +- 7 files changed, 265 insertions(+), 59 deletions(-) diff --git a/include/dr/detail/communicator.hpp b/include/dr/detail/communicator.hpp index 7d41de05a5..331253ab63 100644 --- a/include/dr/detail/communicator.hpp +++ b/include/dr/detail/communicator.hpp @@ -232,7 +232,7 @@ class rma_window { void create(communicator comm, void *data, std::size_t size) { local_data_ = data; communicator_ = comm; - DRLOG("win create:: size: {} data:{}", size, data); + DRLOG("win create:: size: {} data:{}", size, data); MPI_Win_create(data, size, 1, MPI_INFO_NULL, comm.mpi_comm(), &win_); } diff --git a/include/dr/mp/containers/distributed_vector.hpp b/include/dr/mp/containers/distributed_vector.hpp index 22075ec2a5..2611963064 100644 --- a/include/dr/mp/containers/distributed_vector.hpp +++ b/include/dr/mp/containers/distributed_vector.hpp @@ -16,6 +16,7 @@ class MpiBackend { public: void *allocate(std::size_t data_size) { + assert(data_size > 0); void *data = __detail::allocator().allocate(data_size); DRLOG("called MPI allocate({}) -> got:{}", data_size, data); win_.create(default_comm(), data, data_size); @@ -252,9 +253,8 @@ template class distributed_vector { ~distributed_vector() { if (!finalized()) { fence(); - if (data_ != nullptr) { - backend_.deallocate(data_, data_size_ * sizeof(value_type)); + backend.deallocate(data_, data_size_ * sizeof(value_type)); } delete halo_; @@ -274,14 +274,7 @@ template class distributed_vector { auto segments() const { return rng::views::all(segments_); } - void fence() { backend_.fence(); } - - backend_type& backend(const std::size_t segment_index) { return backend_; } - const backend_type& backend(const std::size_t segment_index) const { - return backend_; - } - - T *data(const std::size_t segment_index) { return data_; } + void fence() { backend.fence(); } private: void init(auto size, auto dist) { @@ -302,7 +295,7 @@ template class distributed_vector { data_size_ = segment_size_ + hb.prev + hb.next; if (size_ > 0) { - data_ = static_cast(backend_.allocate(data_size_ * sizeof(T))); + data_ = static_cast(backend.allocate(data_size_ * sizeof(T))); } halo_ = new span_halo(default_comm(), data_, data_size_, hb); @@ -326,7 +319,7 @@ template class distributed_vector { distribution distribution_; std::size_t size_; std::vector> segments_; - BackendT backend_; + BackendT backend; }; template diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 49116608dc..b9d104176d 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -316,8 +316,7 @@ class dual_distributed_vector { } } - friend dv_segment_iterator; - // friend dual_dv_segment_iterator; + friend dual_dv_segment_iterator; std::size_t segment_size_ = 0; std::size_t data_size_ = 0; // size + halo diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp index 37bdfe2745..e28df9b2b9 100644 --- a/include/dr/mp/containers/dual_segment.hpp +++ b/include/dr/mp/containers/dual_segment.hpp @@ -2,54 +2,251 @@ // // SPDX-License-Identifier: BSD-3-Clause -#include "segment.hpp" - #pragma once namespace dr::mp { -// template -// class dual_dv_segment_iterator : public dv_segment_iterator { -// public: -// dual_dv_segment_iterator() = default; -// dual_dv_segment_iterator(DV *dv, std::size_t segment_index, std::size_t index) -// : dv_segment_iterator(dv, segment_index, index) { -// } +template class dual_dv_segment_iterator; -// auto rank() const { -// std::cout << "rank(): segment_index_ == " << this->segment_index_ << "\n"; +template class dual_dv_segment_reference { + using iterator = dual_dv_segment_iterator; -// if (this->segment_index_ < default_comm().size()) { -// return this->segment_index_; -// } +public: + using value_type = typename DV::value_type; -// return 2 * default_comm().size() - this->segment_index_ - 1; -// } -// }; + dual_dv_segment_reference(const iterator it) : iterator_(it) {} + + operator value_type() const { return iterator_.get(); } + auto operator=(const value_type &value) const { + iterator_.put(value); + return *this; + } + auto operator=(const dual_dv_segment_reference &other) const { + *this = value_type(other); + return *this; + } + auto operator&() const { return iterator_; } -template -class dual_dv_segment : public dv_segment { private: - using iterator = dv_segment_iterator; + const iterator iterator_; +}; // dual_dv_segment_reference + +template class dual_dv_segment_iterator { +public: + using value_type = typename DV::value_type; + using size_type = typename DV::size_type; + using difference_type = typename DV::difference_type; + + dual_dv_segment_iterator() = default; + dual_dv_segment_iterator(DV *dv, std::size_t segment_index, std::size_t index) { + dv_ = dv; + segment_index_ = segment_index; + index_ = index; + } + + auto operator<=>(const dual_dv_segment_iterator &other) const noexcept { + // assertion below checks against compare dereferenceable iterator to a + // singular iterator and against attempt to compare iterators from different + // sequences like _Safe_iterator does + assert(dv_ == other.dv_); + return segment_index_ == other.segment_index_ + ? index_ <=> other.index_ + : segment_index_ <=> other.segment_index_; + } + + // Comparison + bool operator==(const dual_dv_segment_iterator &other) const noexcept { + return (*this <=> other) == 0; + } + + // Only this arithmetic manipulate internal state + auto &operator+=(difference_type n) { + assert(dv_ != nullptr); + assert(n >= 0 || static_cast(index_) >= -n); + index_ += n; + return *this; + } + + auto &operator-=(difference_type n) { return *this += (-n); } + + difference_type operator-(const dual_dv_segment_iterator &other) const noexcept { + assert(dv_ != nullptr && dv_ == other.dv_); + assert(index_ >= other.index_); + return index_ - other.index_; + } + + // prefix + auto &operator++() { + *this += 1; + return *this; + } + auto &operator--() { + *this -= 1; + return *this; + } + + // postfix + auto operator++(int) { + auto prev = *this; + *this += 1; + return prev; + } + auto operator--(int) { + auto prev = *this; + *this -= 1; + return prev; + } + + auto operator+(difference_type n) const { + auto p = *this; + p += n; + return p; + } + auto operator-(difference_type n) const { + auto p = *this; + p -= n; + return p; + } + + // When *this is not first in the expression + friend auto operator+(difference_type n, const dual_dv_segment_iterator &other) { + return other + n; + } + + // dereference + auto operator*() const { + assert(dv_ != nullptr); + return dual_dv_segment_reference{*this}; + } + auto operator[](difference_type n) const { + assert(dv_ != nullptr); + return *(*this + n); + } + + void get(value_type *dst, std::size_t size) const { + assert(dv_ != nullptr); + assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); + auto segment_offset = index_ + dv_->distribution_.halo().prev; + dv_->backend(segment_index_).getmem(dst, segment_offset * sizeof(value_type), + size * sizeof(value_type), segment_index_); + } + + value_type get() const { + value_type val; + get(&val, 1); + return val; + } + + void put(const value_type *dst, std::size_t size) const { + std::cout << "put with (size=" << size << " segment_index_=" << segment_index_ << " index_=" << index_ << ")\n"; + assert(dv_ != nullptr); + assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); + auto segment_offset = index_ + dv_->distribution_.halo().prev; + dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, + size); + dv_->backend(segment_index_).putmem(dst, segment_offset * sizeof(value_type), + size * sizeof(value_type), rank()); + } + + void put(const value_type &value) const { put(&value, 1); } + + auto rank() const { + std::cout << "rank()\n\tsegment_index_ == " << this->segment_index_ << "\n"; + + // return this->segment_index_; + + if (this->segment_index_ < default_comm().size()) { + std::cout << "\treturning: " << this->segment_index_ << "\n"; + return this->segment_index_; + } + + std::cout << "\treturning: " << 2 * default_comm().size() - this->segment_index_ - 1 << "\n"; + return 2 * default_comm().size() - this->segment_index_ - 1; + } + + auto local() const { +#ifndef SYCL_LANGUAGE_VERSION + assert(dv_ != nullptr); +#endif + const auto my_process_segment_index = dv_->backend(segment_index_).getrank(); + + if (my_process_segment_index == segment_index_) + return dv_->data(segment_index_) + index_ + dv_->distribution_.halo().prev; +#ifndef SYCL_LANGUAGE_VERSION + assert(!dv_->distribution_.halo().periodic); // not implemented +#endif + // sliding view needs local iterators that point to the halo + if (my_process_segment_index + 1 == segment_index_) { +#ifndef SYCL_LANGUAGE_VERSION + assert(index_ <= dv_->distribution_.halo() + .next); // <= instead of < to cover end() case +#endif + return dv_->data(segment_index_) + dv_->distribution_.halo().prev + + index_ + dv_->segment_size_; + } + + if (my_process_segment_index == segment_index_ + 1) { +#ifndef SYCL_LANGUAGE_VERSION + assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); +#endif + return dv_->data(segment_index_) + dv_->distribution_.halo().prev + + index_ - dv_->segment_size_; + } + +#ifndef SYCL_LANGUAGE_VERSION + assert(false); // trying to read non-owned memory +#endif + return static_castdata(segment_index_))>(nullptr); + } + + auto segments() const { + assert(dv_ != nullptr); + return dr::__detail::drop_segments(dv_->segments(), segment_index_, index_); + } + + auto &halo() const { + assert(dv_ != nullptr); + return dv_->halo(); + } + auto halo_bounds() const { + assert(dv_ != nullptr); + return dv_->distribution_.halo(); + } + +private: + // all fields need to be initialized by default ctor so every default + // constructed iter is equal to any other default constructed iter + DV *dv_ = nullptr; + std::size_t segment_index_ = 0; + std::size_t index_ = 0; +}; // dual_dv_segment_iterator + +template class dual_dv_segment { +private: + using iterator = dual_dv_segment_iterator; public: using difference_type = std::ptrdiff_t; dual_dv_segment() = default; dual_dv_segment(DV *dv, std::size_t segment_index, std::size_t size, - std::size_t reserved) - : dv_segment(dv, segment_index, size, reserved) { + std::size_t reserved) { + dv_ = dv; + segment_index_ = segment_index; + size_ = size; + reserved_ = reserved; + assert(dv_ != nullptr); } - // auto size() const { - // assert(this->dv_ != nullptr); - // return this->size_; - // } + auto size() const { + assert(dv_ != nullptr); + return size_; + } - // auto begin() const { return iterator(this->dv_, this->segment_index_, 0); } - // auto end() const { return begin() + size(); } - // auto reserved() const { return this->reserved_; } + auto begin() const { return iterator(dv_, segment_index_, 0); } + auto end() const { return begin() + size(); } + auto reserved() const { return reserved_; } - // auto operator[](difference_type n) const { return *(begin() + n); } + auto operator[](difference_type n) const { return *(begin() + n); } bool is_local() const { return this->segment_index_ == default_comm().rank() @@ -62,6 +259,23 @@ class dual_dv_segment : public dv_segment { private: bool _is_compute = true; + DV *dv_ = nullptr; + std::size_t segment_index_; + std::size_t size_; + std::size_t reserved_; }; // dual_dv_segment +// +// Many views preserve the distributed_vector segments iterator, which +// can supply halo +// +// template +// concept has_halo_method = dr::distributed_range && requires(DR &&dr) { +// { rng::begin(dr::ranges::segments(dr)[0]).halo() }; +// }; + +// auto &halo(has_halo_method auto &&dr) { +// return rng::begin(dr::ranges::segments(dr)[0]).halo(); +// } + } // namespace dr::mp diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index a22ca661f2..b388292110 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -138,7 +138,6 @@ template class dv_segment_iterator { } void put(const value_type *dst, std::size_t size) const { - std::cout << "put with (size=" << size << " segment_index_=" << segment_index_ << " index_=" << index_ << ")\n"; assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; @@ -152,14 +151,7 @@ template class dv_segment_iterator { auto rank() const { assert(dv_ != nullptr); - - std::cout << "rank(): segment_index_ == " << this->segment_index_ << "\n"; - - if (this->segment_index_ < default_comm().size()) { - return this->segment_index_; - } - - return 2 * default_comm().size() - this->segment_index_ - 1; + return this->segment_index_; } auto local() const { @@ -248,7 +240,7 @@ template class dv_segment { bool is_local() const { return segment_index_ == default_comm().rank(); } -protected: +private: DV *dv_ = nullptr; std::size_t segment_index_; std::size_t size_; diff --git a/include/dr/mp/halo.hpp b/include/dr/mp/halo.hpp index 50df1a7c8e..305cf6a998 100644 --- a/include/dr/mp/halo.hpp +++ b/include/dr/mp/halo.hpp @@ -445,9 +445,17 @@ class cyclic_span_halo { } void exchange() { - partial_exchange(); - increment_index(); - partial_exchange(); + // partial_exchange(); + // increment_index(); + // partial_exchange(); + } + + void exchange_begin() { + // todo + } + + void exchange_finalize() { + // todo } void reduce_begin() { diff --git a/test/gtest/mp/halo.cpp b/test/gtest/mp/halo.cpp index 6d4394ed96..5e99be74b9 100644 --- a/test/gtest/mp/halo.cpp +++ b/test/gtest/mp/halo.cpp @@ -18,7 +18,7 @@ void local_is_accessible_in_halo_region(const int halo_prev, iota(dv, 0); DRLOG("exchange start"); - dv.halo().exchange(); + // dv.halo().exchange(); // arrays below is function depending on size of communicator-1 std::array first_local_index___; From 92baecf69e9fd7831eb4a6dcd06a309c170f72d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 4 Mar 2025 20:50:01 +0100 Subject: [PATCH 084/101] prog --- include/dr/mp/containers/segment.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index b388292110..2b4eaa256d 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -127,7 +127,7 @@ template class dv_segment_iterator { assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; - dv_->backend(segment_index_).getmem(dst, segment_offset * sizeof(value_type), + dv_->backend.getmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -143,7 +143,7 @@ template class dv_segment_iterator { auto segment_offset = index_ + dv_->distribution_.halo().prev; dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); - dv_->backend(segment_index_).putmem(dst, segment_offset * sizeof(value_type), + dv_->backend.putmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), rank()); } @@ -151,17 +151,17 @@ template class dv_segment_iterator { auto rank() const { assert(dv_ != nullptr); - return this->segment_index_; + return segment_index_; } auto local() const { #ifndef SYCL_LANGUAGE_VERSION assert(dv_ != nullptr); #endif - const auto my_process_segment_index = dv_->backend(segment_index_).getrank(); + const auto my_process_segment_index = dv_->backend.getrank(); if (my_process_segment_index == segment_index_) - return dv_->data(segment_index_) + index_ + dv_->distribution_.halo().prev; + return dv_->data + index_ + dv_->distribution_.halo().prev; #ifndef SYCL_LANGUAGE_VERSION assert(!dv_->distribution_.halo().periodic); // not implemented #endif @@ -171,7 +171,7 @@ template class dv_segment_iterator { assert(index_ <= dv_->distribution_.halo() .next); // <= instead of < to cover end() case #endif - return dv_->data(segment_index_) + dv_->distribution_.halo().prev + return dv_->data + dv_->distribution_.halo().prev + index_ + dv_->segment_size_; } @@ -179,14 +179,14 @@ template class dv_segment_iterator { #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); #endif - return dv_->data(segment_index_) + dv_->distribution_.halo().prev - + index_ - dv_->segment_size_; + return dv_->data + dv_->distribution_.halo().prev + index_ - + dv_->segment_size_; } #ifndef SYCL_LANGUAGE_VERSION assert(false); // trying to read non-owned memory #endif - return static_castdata(segment_index_))>(nullptr); + return static_castdata)>(nullptr); } auto segments() const { From 232b66f5d2d42015a9d81cfe4f794e6b0eccf082 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 4 Mar 2025 20:51:22 +0100 Subject: [PATCH 085/101] prog --- include/dr/mp/containers/segment.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 2b4eaa256d..78d650b9ff 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -144,7 +144,7 @@ template class dv_segment_iterator { dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); dv_->backend.putmem(dst, segment_offset * sizeof(value_type), - size * sizeof(value_type), rank()); + size * sizeof(value_type), segment_index_); } void put(const value_type &value) const { put(&value, 1); } @@ -161,7 +161,7 @@ template class dv_segment_iterator { const auto my_process_segment_index = dv_->backend.getrank(); if (my_process_segment_index == segment_index_) - return dv_->data + index_ + dv_->distribution_.halo().prev; + return dv_->data_ + index_ + dv_->distribution_.halo().prev; #ifndef SYCL_LANGUAGE_VERSION assert(!dv_->distribution_.halo().periodic); // not implemented #endif @@ -171,22 +171,22 @@ template class dv_segment_iterator { assert(index_ <= dv_->distribution_.halo() .next); // <= instead of < to cover end() case #endif - return dv_->data + dv_->distribution_.halo().prev - + index_ + dv_->segment_size_; + return dv_->data_ + dv_->distribution_.halo().prev + index_ + + dv_->segment_size_; } if (my_process_segment_index == segment_index_ + 1) { #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); #endif - return dv_->data + dv_->distribution_.halo().prev + index_ - + return dv_->data_ + dv_->distribution_.halo().prev + index_ - dv_->segment_size_; } #ifndef SYCL_LANGUAGE_VERSION assert(false); // trying to read non-owned memory #endif - return static_castdata)>(nullptr); + return static_castdata_)>(nullptr); } auto segments() const { From 7f6adc640f90e3f2fb4a9b5b5a2b6717958aace9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 4 Mar 2025 20:51:38 +0100 Subject: [PATCH 086/101] prog --- include/dr/mp/containers/segment.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 78d650b9ff..56724ac61b 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -171,7 +171,7 @@ template class dv_segment_iterator { assert(index_ <= dv_->distribution_.halo() .next); // <= instead of < to cover end() case #endif - return dv_->data_ + dv_->distribution_.halo().prev + index_ + + return dv_->data_ + dv_->distribution_.halo().prev + index_ + dv_->segment_size_; } @@ -179,7 +179,7 @@ template class dv_segment_iterator { #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); #endif - return dv_->data_ + dv_->distribution_.halo().prev + index_ - + return dv_->data_ + dv_->distribution_.halo().prev + index_ - dv_->segment_size_; } From f1dd4c10689dbb1cf507b9902d88eed207f53dae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Tue, 4 Mar 2025 20:57:19 +0100 Subject: [PATCH 087/101] prog --- .../mp/containers/dual_distributed_vector.hpp | 10 +++- include/dr/mp/containers/dual_segment.hpp | 56 ++++++------------- 2 files changed, 24 insertions(+), 42 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index b9d104176d..4d251b8775 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -245,15 +245,19 @@ class dual_distributed_vector { __attribute__((unused)) void fence(const std::size_t i) { backends_[i].fence(); } + auto res_idx(const std::size_t segment_index) const { + return segment_index < default_comm().size() ? 0 : 1; + } + backend_type& backend(const std::size_t segment_index) { - return backends_[segment_index < default_comm().size() ? 0 : 1]; + return backends_[res_idx(segment_index)]; } const backend_type& backend(const std::size_t segment_index) const { - return backends_[segment_index < default_comm().size() ? 0 : 1]; + return backends_[res_idx(segment_index)]; } T *data(const std::size_t segment_index) { - return datas_[segment_index < default_comm().size() ? 0 : 1]; + return datas_[res_idx(segment_index)]; } private: diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp index e28df9b2b9..beb793133d 100644 --- a/include/dr/mp/containers/dual_segment.hpp +++ b/include/dr/mp/containers/dual_segment.hpp @@ -109,14 +109,14 @@ template class dual_dv_segment_iterator { } // When *this is not first in the expression - friend auto operator+(difference_type n, const dual_dv_segment_iterator &other) { + friend auto operator+(difference_type n, const dv_segment_iterator &other) { return other + n; } // dereference auto operator*() const { assert(dv_ != nullptr); - return dual_dv_segment_reference{*this}; + return dv_segment_reference{*this}; } auto operator[](difference_type n) const { assert(dv_ != nullptr); @@ -127,7 +127,7 @@ template class dual_dv_segment_iterator { assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; - dv_->backend(segment_index_).getmem(dst, segment_offset * sizeof(value_type), + dv_->backend.getmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -138,40 +138,30 @@ template class dual_dv_segment_iterator { } void put(const value_type *dst, std::size_t size) const { - std::cout << "put with (size=" << size << " segment_index_=" << segment_index_ << " index_=" << index_ << ")\n"; assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); - dv_->backend(segment_index_).putmem(dst, segment_offset * sizeof(value_type), - size * sizeof(value_type), rank()); + dv_->backend.putmem(dst, segment_offset * sizeof(value_type), + size * sizeof(value_type), segment_index_); } void put(const value_type &value) const { put(&value, 1); } auto rank() const { - std::cout << "rank()\n\tsegment_index_ == " << this->segment_index_ << "\n"; - - // return this->segment_index_; - - if (this->segment_index_ < default_comm().size()) { - std::cout << "\treturning: " << this->segment_index_ << "\n"; - return this->segment_index_; - } - - std::cout << "\treturning: " << 2 * default_comm().size() - this->segment_index_ - 1 << "\n"; - return 2 * default_comm().size() - this->segment_index_ - 1; + assert(dv_ != nullptr); + return segment_index_; } auto local() const { #ifndef SYCL_LANGUAGE_VERSION assert(dv_ != nullptr); #endif - const auto my_process_segment_index = dv_->backend(segment_index_).getrank(); + const auto my_process_segment_index = dv_->backend.getrank(); if (my_process_segment_index == segment_index_) - return dv_->data(segment_index_) + index_ + dv_->distribution_.halo().prev; + return dv_->data_ + index_ + dv_->distribution_.halo().prev; #ifndef SYCL_LANGUAGE_VERSION assert(!dv_->distribution_.halo().periodic); // not implemented #endif @@ -181,22 +171,22 @@ template class dual_dv_segment_iterator { assert(index_ <= dv_->distribution_.halo() .next); // <= instead of < to cover end() case #endif - return dv_->data(segment_index_) + dv_->distribution_.halo().prev - + index_ + dv_->segment_size_; + return dv_->data_ + dv_->distribution_.halo().prev + index_ + + dv_->segment_size_; } if (my_process_segment_index == segment_index_ + 1) { #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); #endif - return dv_->data(segment_index_) + dv_->distribution_.halo().prev - + index_ - dv_->segment_size_; + return dv_->data_ + dv_->distribution_.halo().prev + index_ - + dv_->segment_size_; } #ifndef SYCL_LANGUAGE_VERSION assert(false); // trying to read non-owned memory #endif - return static_castdata(segment_index_))>(nullptr); + return static_castdata_)>(nullptr); } auto segments() const { @@ -249,8 +239,9 @@ template class dual_dv_segment { auto operator[](difference_type n) const { return *(begin() + n); } bool is_local() const { - return this->segment_index_ == default_comm().rank() - || this->segment_index_ == 2 * default_comm().size() - default_comm().rank() - 1; + auto rank = default_comm().rank(); + return this->segment_index_ == rank + || this->segment_index_ == 2 * default_comm().size() - rank - 1; } bool is_compute() const { return _is_compute; } @@ -265,17 +256,4 @@ template class dual_dv_segment { std::size_t reserved_; }; // dual_dv_segment -// -// Many views preserve the distributed_vector segments iterator, which -// can supply halo -// -// template -// concept has_halo_method = dr::distributed_range && requires(DR &&dr) { -// { rng::begin(dr::ranges::segments(dr)[0]).halo() }; -// }; - -// auto &halo(has_halo_method auto &&dr) { -// return rng::begin(dr::ranges::segments(dr)[0]).halo(); -// } - } // namespace dr::mp From 71bff7d1cc3a5ec43545be157668e302d867e4e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 10 Mar 2025 22:08:24 +0100 Subject: [PATCH 088/101] prog --- include/dr/mp/containers/dual_segment.hpp | 45 +++++++++++++---------- include/dr/mp/halo.hpp | 17 ++++++--- test/gtest/mp/halo.cpp | 6 +-- 3 files changed, 40 insertions(+), 28 deletions(-) diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp index beb793133d..582ecd216d 100644 --- a/include/dr/mp/containers/dual_segment.hpp +++ b/include/dr/mp/containers/dual_segment.hpp @@ -109,14 +109,14 @@ template class dual_dv_segment_iterator { } // When *this is not first in the expression - friend auto operator+(difference_type n, const dv_segment_iterator &other) { + friend auto operator+(difference_type n, const dual_dv_segment_iterator &other) { return other + n; } // dereference auto operator*() const { assert(dv_ != nullptr); - return dv_segment_reference{*this}; + return dual_dv_segment_reference{*this}; } auto operator[](difference_type n) const { assert(dv_ != nullptr); @@ -127,8 +127,8 @@ template class dual_dv_segment_iterator { assert(dv_ != nullptr); assert(segment_index_ * dv_->segment_size_ + index_ < dv_->size()); auto segment_offset = index_ + dv_->distribution_.halo().prev; - dv_->backend.getmem(dst, segment_offset * sizeof(value_type), - size * sizeof(value_type), segment_index_); + dv_->backend(segment_index_).getmem(dst, segment_offset * sizeof(value_type), + size * sizeof(value_type), segment_index_); } value_type get() const { @@ -143,7 +143,7 @@ template class dual_dv_segment_iterator { auto segment_offset = index_ + dv_->distribution_.halo().prev; dr::drlog.debug("dv put:: ({}:{}:{})\n", segment_index_, segment_offset, size); - dv_->backend.putmem(dst, segment_offset * sizeof(value_type), + dv_->backend(segment_index_).putmem(dst, segment_offset * sizeof(value_type), size * sizeof(value_type), segment_index_); } @@ -158,35 +158,40 @@ template class dual_dv_segment_iterator { #ifndef SYCL_LANGUAGE_VERSION assert(dv_ != nullptr); #endif - const auto my_process_segment_index = dv_->backend.getrank(); - - if (my_process_segment_index == segment_index_) - return dv_->data_ + index_ + dv_->distribution_.halo().prev; + const auto my_process_rank = dv_->backend(segment_index_).getrank(); + const auto normalized_segment_index = segment_index_ < default_comm().size() + ? segment_index_ + : segment_index_ - default_comm().size(); + const auto data = dv_->data(segment_index_); + + if (my_process_rank == normalized_segment_index) { + std::cout << "case 0\n"; + return data + index_ + dv_->distribution_.halo().prev; + } #ifndef SYCL_LANGUAGE_VERSION assert(!dv_->distribution_.halo().periodic); // not implemented #endif // sliding view needs local iterators that point to the halo - if (my_process_segment_index + 1 == segment_index_) { + if (my_process_rank + 1 == normalized_segment_index) { + std::cout << "case 1\n"; #ifndef SYCL_LANGUAGE_VERSION - assert(index_ <= dv_->distribution_.halo() - .next); // <= instead of < to cover end() case + assert(index_ <= dv_->distribution_.halo().next); // <= instead of < to cover end() case #endif - return dv_->data_ + dv_->distribution_.halo().prev + index_ + - dv_->segment_size_; + return data + dv_->distribution_.halo().prev + index_ + dv_->segment_size_; } - if (my_process_segment_index == segment_index_ + 1) { + if (my_process_rank == normalized_segment_index + 1) { + std::cout << "case 2\n"; #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); #endif - return dv_->data_ + dv_->distribution_.halo().prev + index_ - - dv_->segment_size_; + return data + dv_->distribution_.halo().prev + index_ - dv_->segment_size_; } #ifndef SYCL_LANGUAGE_VERSION assert(false); // trying to read non-owned memory #endif - return static_castdata_)>(nullptr); + return static_castdata(segment_index_))>(nullptr); } auto segments() const { @@ -240,8 +245,8 @@ template class dual_dv_segment { bool is_local() const { auto rank = default_comm().rank(); - return this->segment_index_ == rank - || this->segment_index_ == 2 * default_comm().size() - rank - 1; + return segment_index_ == rank + || segment_index_ == 2 * default_comm().size() - rank - 1; } bool is_compute() const { return _is_compute; } diff --git a/include/dr/mp/halo.hpp b/include/dr/mp/halo.hpp index 305cf6a998..6384518eca 100644 --- a/include/dr/mp/halo.hpp +++ b/include/dr/mp/halo.hpp @@ -445,17 +445,24 @@ class cyclic_span_halo { } void exchange() { - // partial_exchange(); - // increment_index(); - // partial_exchange(); + partial_exchange(); + increment_index(); + partial_exchange(); + increment_index(); } void exchange_begin() { - // todo + halos_[next_comm_index_]->partial_exchange_begin(); + increment_index(); + halos_[next_comm_index_]->partial_exchange_begin(); + increment_index(); } void exchange_finalize() { - // todo + halos_[next_comm_index_]->partial_exchange_finalize(); + increment_index(); + halos_[next_comm_index_]->partial_exchange_finalize(); + increment_index(); } void reduce_begin() { diff --git a/test/gtest/mp/halo.cpp b/test/gtest/mp/halo.cpp index 5e99be74b9..e9cf7a5cf0 100644 --- a/test/gtest/mp/halo.cpp +++ b/test/gtest/mp/halo.cpp @@ -18,7 +18,7 @@ void local_is_accessible_in_halo_region(const int halo_prev, iota(dv, 0); DRLOG("exchange start"); - // dv.halo().exchange(); + dv.halo().exchange(); // arrays below is function depending on size of communicator-1 std::array first_local_index___; @@ -59,8 +59,8 @@ void local_is_accessible_in_halo_region(const int halo_prev, auto first_legal_idx = std::max(0, first_local_index___[c] - halo_prev); auto first_illegal_idx = std::min(6, first_nonlocal_index[c] + halo_next); - std::cout << "first_legal_idx: " << first_legal_idx << "\n"; - std::cout << "first_illegal_idx: " << first_illegal_idx << "\n"; + // std::cout << "first_legal_idx: " << first_legal_idx << "\n"; + // std::cout << "first_illegal_idx: " << first_illegal_idx << "\n"; DRLOG("checking access to idx between first legal {} and first illegal {}, " "c:{}", From 8b6dc9d7f8268287bee393201ef41062b97cb5e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 10 Mar 2025 23:54:04 +0100 Subject: [PATCH 089/101] prog --- include/dr/mp/algorithms/for_each.hpp | 8 +- .../mp/containers/dual_distributed_vector.hpp | 7 +- include/dr/mp/containers/dual_segment.hpp | 39 +++-- include/dr/mp/containers/segment.hpp | 6 +- include/dr/mp/halo.hpp | 11 +- test/gtest/mp/CMakeLists.txt | 5 +- test/gtest/mp/halo-dual.cpp | 151 ++++++++++++++++++ test/gtest/mp/halo.cpp | 3 - 8 files changed, 201 insertions(+), 29 deletions(-) create mode 100644 test/gtest/mp/halo-dual.cpp diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index 4e7ae04332..070b934f80 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -76,10 +76,10 @@ void for_each(dr::distributed_range auto &&dr, auto op) { assert(rng::distance(s) > 0); #ifdef SYCL_LANGUAGE_VERSION - dr::__detail::parallel_for( - dr::mp::sycl_queue(), sycl::range<1>(rng::distance(s)), - [first = rng::begin(s), op](auto idx) { op(first[idx]); }) - .wait(); + // dr::__detail::parallel_for( + // dr::mp::sycl_queue(), sycl::range<1>(rng::distance(s)), + // [first = rng::begin(s), op](auto idx) { op(first[idx]); }) + // .wait(); #else assert(false); #endif diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 4d251b8775..5acef6a30d 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -296,11 +296,10 @@ class dual_distributed_vector { for (std::size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { if (size_ > 0) { - datas_.push_back(static_cast( - backends_[i].allocate(data_size_ * sizeof(value_type)))); + datas_.push_back(static_cast(backends_[i].allocate(data_size_ * sizeof(value_type)))); + std::memset(datas_[i], 69, data_size_ * sizeof(value_type)); // todo: debug remove later + halos_.push_back(new span_halo(default_comm(), datas_[i], data_size_, hb)); } - - halos_.push_back(new span_halo(default_comm(), datas_[i], data_size_, hb)); } halo_ = new cyclic_span_halo(halos_); diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp index 582ecd216d..357d688c0f 100644 --- a/include/dr/mp/containers/dual_segment.hpp +++ b/include/dr/mp/containers/dual_segment.hpp @@ -151,7 +151,12 @@ template class dual_dv_segment_iterator { auto rank() const { assert(dv_ != nullptr); - return segment_index_; + + if (segment_index_ < default_comm().size()) { + return segment_index_; + } + + return 2 * default_comm().size() - segment_index_ - 1; } auto local() const { @@ -159,29 +164,43 @@ template class dual_dv_segment_iterator { assert(dv_ != nullptr); #endif const auto my_process_rank = dv_->backend(segment_index_).getrank(); - const auto normalized_segment_index = segment_index_ < default_comm().size() + const bool is_left_segment = segment_index_ < default_comm().size(); + const auto normalized_segment_index = is_left_segment ? segment_index_ - : segment_index_ - default_comm().size(); + : 2 * default_comm().size() - segment_index_ - 1; const auto data = dv_->data(segment_index_); - - if (my_process_rank == normalized_segment_index) { - std::cout << "case 0\n"; + + const bool is_in_bounds = my_process_rank == normalized_segment_index; + const bool is_in_halo_prev = is_left_segment + ? my_process_rank + 1 == normalized_segment_index + : my_process_rank == normalized_segment_index + 1; + const bool is_in_halo_next = is_left_segment + ? my_process_rank == normalized_segment_index + 1 + : my_process_rank + 1 == normalized_segment_index; + + std::cout << "my_process_rank, normalized_segment_index = " << my_process_rank << " " << normalized_segment_index << "\n"; + + if (is_in_bounds) { + std::cout << "case is_in_bounds\n"; + assert(!is_in_halo_prev && !is_in_halo_next); return data + index_ + dv_->distribution_.halo().prev; } #ifndef SYCL_LANGUAGE_VERSION assert(!dv_->distribution_.halo().periodic); // not implemented #endif // sliding view needs local iterators that point to the halo - if (my_process_rank + 1 == normalized_segment_index) { - std::cout << "case 1\n"; + if (is_in_halo_prev) { + std::cout << "case is_in_halo_prev\n"; + assert(!is_in_bounds && !is_in_halo_next); #ifndef SYCL_LANGUAGE_VERSION assert(index_ <= dv_->distribution_.halo().next); // <= instead of < to cover end() case #endif return data + dv_->distribution_.halo().prev + index_ + dv_->segment_size_; } - if (my_process_rank == normalized_segment_index + 1) { - std::cout << "case 2\n"; + if (is_in_halo_next) { + std::cout << "case is_in_halo_next\n"; + assert(!is_in_bounds && !is_in_halo_prev); #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); #endif diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index 56724ac61b..c967331426 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -160,13 +160,16 @@ template class dv_segment_iterator { #endif const auto my_process_segment_index = dv_->backend.getrank(); - if (my_process_segment_index == segment_index_) + if (my_process_segment_index == segment_index_) { + std::cout << "case 0\n"; return dv_->data_ + index_ + dv_->distribution_.halo().prev; + } #ifndef SYCL_LANGUAGE_VERSION assert(!dv_->distribution_.halo().periodic); // not implemented #endif // sliding view needs local iterators that point to the halo if (my_process_segment_index + 1 == segment_index_) { + std::cout << "case 1\n"; #ifndef SYCL_LANGUAGE_VERSION assert(index_ <= dv_->distribution_.halo() .next); // <= instead of < to cover end() case @@ -176,6 +179,7 @@ template class dv_segment_iterator { } if (my_process_segment_index == segment_index_ + 1) { + std::cout << "case 2\n"; #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); #endif diff --git a/include/dr/mp/halo.hpp b/include/dr/mp/halo.hpp index 6384518eca..ede1e18ffc 100644 --- a/include/dr/mp/halo.hpp +++ b/include/dr/mp/halo.hpp @@ -440,8 +440,7 @@ class cyclic_span_halo { } void partial_exchange() { - halos_[next_comm_index_]->exchange_begin(); - halos_[next_comm_index_]->exchange_finalize(); + halos_[next_comm_index_]->exchange(); } void exchange() { @@ -452,16 +451,16 @@ class cyclic_span_halo { } void exchange_begin() { - halos_[next_comm_index_]->partial_exchange_begin(); + partial_exchange_begin(); increment_index(); - halos_[next_comm_index_]->partial_exchange_begin(); + partial_exchange_begin(); increment_index(); } void exchange_finalize() { - halos_[next_comm_index_]->partial_exchange_finalize(); + partial_exchange_finalize(); increment_index(); - halos_[next_comm_index_]->partial_exchange_finalize(); + partial_exchange_finalize(); increment_index(); } diff --git a/test/gtest/mp/CMakeLists.txt b/test/gtest/mp/CMakeLists.txt index e1f3ba70f4..410afe5017 100644 --- a/test/gtest/mp/CMakeLists.txt +++ b/test/gtest/mp/CMakeLists.txt @@ -38,6 +38,7 @@ add_executable( distributed_vector.cpp dual_distributed_vector.cpp halo.cpp + halo-dual.cpp mdstar.cpp mpsort.cpp reduce.cpp @@ -59,7 +60,7 @@ add_executable( add_executable(mp-quick-test mp-tests.cpp - halo.cpp + halo-dual.cpp # ../common/for_each.cpp # dual_distributed_vector.cpp ) @@ -81,6 +82,8 @@ endforeach() if(NOT ENABLE_ISHMEM) add_mp_ctest(NAME mp-quick-test NPROC 1) add_mp_ctest(NAME mp-quick-test NPROC 2) + add_mp_ctest(NAME mp-quick-test NPROC 3) + add_mp_ctest(NAME mp-quick-test NPROC 4) cmake_path(GET MPI_CXX_ADDITIONAL_INCLUDE_DIRS FILENAME MPI_IMPL) diff --git a/test/gtest/mp/halo-dual.cpp b/test/gtest/mp/halo-dual.cpp new file mode 100644 index 0000000000..927e532999 --- /dev/null +++ b/test/gtest/mp/halo-dual.cpp @@ -0,0 +1,151 @@ +// SPDX-FileCopyrightText: Intel Corporation +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "xp-tests.hpp" + +template class HaloDual : public testing::Test {}; + +TYPED_TEST_SUITE(HaloDual, ::testing::Types>); + +template +void local_is_accessible_in_halo_region(const int halo_prev, + const int halo_next) { + + DV dv(12, dr::mp::distribution().halo(halo_prev, halo_next)); + DRLOG("local_is_accessible_in_halo_region TEST START, prev:{}, next:{}", + halo_prev, halo_next); + iota(dv, 0); + DRLOG("exchange start"); + + std::cout << "[started exchange]\n"; + dv.halo().exchange(); + std::cout << "[finished exchange]\n"; + + // 0 1 1 0 + // 012 345 678 9ab + + // 0 1 2 2 1 0 + // 01 23 45 67 89 ab + + // 0 1 2 3 3 2 + // 01 23 45 67 89 ab + + // arrays below is function depending on size of communicator-1 + std::array first_local_index____; + std::array first_nonlocal_index_; + std::array second_local_index___; + std::array second_nonlocal_index; + const int X = 10000; // to mark unused value + + switch (dr::mp::default_comm().rank()) { + case 0: + first_local_index____ = {+0, +0, +0, 0, 0, 0}; + first_nonlocal_index_ = {12, +3, +2, 2, 1, 1}; + second_local_index___ = {+X, +9, 10, X, 0, 0}; + second_nonlocal_index = {+X, 12, 12, X, 1, 1}; + break; + case 1: + first_local_index____ = {X, 3, +2, 2, 2, 1}; + first_nonlocal_index_ = {X, 9, +4, 4, 4, 2}; + second_local_index___ = {X, X, +8, X, 0, 0}; + second_nonlocal_index = {X, X, 10, X, 1, 1}; + break; + case 2: + first_local_index____ = {X, X, 4, +4, 4, 2}; + first_nonlocal_index_ = {X, X, 8, +6, 6, 3}; + second_local_index___ = {X, X, X, 10, 0, 0}; + second_nonlocal_index = {X, X, X, 12, 1, 1}; + break; + case 3: + first_local_index____ = {X, X, X, +6, 6, 3}; + first_nonlocal_index_ = {X, X, X, 10, 6, 4}; + second_local_index___ = {X, X, X, X, 0, 0}; + second_nonlocal_index = {X, X, X, X, 1, 1}; + break; + case 4: + first_local_index____ = {X, X, X, X, 6, 4}; + first_nonlocal_index_ = {X, X, X, X, 6, 5}; + break; + case 5: + first_local_index____ = {X, X, X, X, X, 5}; + first_nonlocal_index_ = {X, X, X, X, X, 6}; + break; + default: + first_local_index____ = {X, X, X, X, X, X}; + first_nonlocal_index_ = {X, X, X, X, X, X}; + } + + const auto c = dr::mp::default_comm().size() - 1; + auto first_legal_idx = std::max(0, first_local_index____[c] - halo_prev); + auto first_illegal_idx = std::min(12, first_nonlocal_index_[c] + halo_next); + auto second_legal_idx = std::max(0, second_local_index___[c] - halo_prev); + auto second_illegal_idx = std::min(12, second_nonlocal_index[c] + halo_next); + + std::cout << "first_legal_idx, first_illegal_idx: " << first_legal_idx << " " << first_illegal_idx << "\n"; + std::cout << "second_legal_idx, second_illegal_idx: " << second_legal_idx << " " << second_illegal_idx << "\n"; + + DRLOG("checking access to idx between first legal {} and first illegal {}, " + "c:{}", + first_legal_idx, first_illegal_idx, c); + + for (int idx = first_legal_idx; idx < first_illegal_idx; ++idx) { + std::cout << "[idx = " << idx << "]\n"; + typename DV::value_type *local_ptr = (dv.begin() + idx).local(); + EXPECT_TRUE(local_ptr != nullptr); + typename DV::value_type value_on_host; + + if (dr::mp::use_sycl()) + dr::mp::__detail::sycl_copy(local_ptr, &value_on_host); + else + value_on_host = *local_ptr; + + DRLOG("checking idx:{}", idx); + EXPECT_EQ(value_on_host, idx); + } + + DRLOG("checking access to idx between second legal {} and second illegal {}, " + "c:{}", + second_legal_idx, second_illegal_idx, c); + + for (int idx = second_legal_idx; idx < second_illegal_idx; ++idx) { + std::cout << "[idx = " << idx << "]\n"; + typename DV::value_type *local_ptr = (dv.begin() + idx).local(); + EXPECT_TRUE(local_ptr != nullptr); + typename DV::value_type value_on_host; + + if (dr::mp::use_sycl()) + dr::mp::__detail::sycl_copy(local_ptr, &value_on_host); + else + value_on_host = *local_ptr; + + DRLOG("checking idx:{}", idx); + EXPECT_EQ(value_on_host, idx); + } + + DRLOG("checks ok"); + + // although assertions indeed happen, but they are not caught by EXPECT_DEATH + // if (first_illegal_idx < 6) { + // dr::drlog.debug("checking first illegal idx:{} after legal ones\n", + // first_illegal_idx); + // EXPECT_DEATH((dv.begin() + first_illegal_idx).local(), "Assertion.*"); + // } + // if (first_legal_idx > 0) { + // dr::drlog.debug("checking last illegal idx:{} before legal ones\n", + // first_legal_idx - 1); + // EXPECT_DEATH((dv.begin() + first_legal_idx - 1).local(), "Assertion.*"); + // } +} + +TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_11) { + local_is_accessible_in_halo_region(1, 1); +} + +TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_10) { + local_is_accessible_in_halo_region(1, 0); +} + +TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_01) { + local_is_accessible_in_halo_region(0, 1); +} diff --git a/test/gtest/mp/halo.cpp b/test/gtest/mp/halo.cpp index e9cf7a5cf0..12f9268133 100644 --- a/test/gtest/mp/halo.cpp +++ b/test/gtest/mp/halo.cpp @@ -59,9 +59,6 @@ void local_is_accessible_in_halo_region(const int halo_prev, auto first_legal_idx = std::max(0, first_local_index___[c] - halo_prev); auto first_illegal_idx = std::min(6, first_nonlocal_index[c] + halo_next); - // std::cout << "first_legal_idx: " << first_legal_idx << "\n"; - // std::cout << "first_illegal_idx: " << first_illegal_idx << "\n"; - DRLOG("checking access to idx between first legal {} and first illegal {}, " "c:{}", first_legal_idx, first_illegal_idx, c); From 73ffe2d2b523d5ea763832c9b2bccfe38f389882 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 12 Mar 2025 03:23:27 +0100 Subject: [PATCH 090/101] prog --- .../mp/containers/dual_distributed_vector.hpp | 2 ++ include/dr/mp/halo.hpp | 24 +++++++++---------- test/gtest/mp/halo-dual.cpp | 21 ++++++++++++++-- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 5acef6a30d..123527a2d6 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -260,6 +260,8 @@ class dual_distributed_vector { return datas_[res_idx(segment_index)]; } + std::size_t data_size() const { return data_size_; } + private: void init(auto size, auto dist) { size_ = size; diff --git a/include/dr/mp/halo.hpp b/include/dr/mp/halo.hpp index ede1e18ffc..bd809f1574 100644 --- a/include/dr/mp/halo.hpp +++ b/include/dr/mp/halo.hpp @@ -433,35 +433,35 @@ class cyclic_span_halo { void partial_exchange_begin() { halos_[next_comm_index_]->exchange_begin(); + increment_index(); } void partial_exchange_finalize() { halos_[next_comm_index_]->exchange_finalize(); + increment_index(); } void partial_exchange() { halos_[next_comm_index_]->exchange(); + increment_index(); } void exchange() { - partial_exchange(); - increment_index(); - partial_exchange(); - increment_index(); + for (const auto &halo: halos_) { + halo->exchange(); + } } void exchange_begin() { - partial_exchange_begin(); - increment_index(); - partial_exchange_begin(); - increment_index(); + for (const auto &halo: halos_) { + halo->exchange_begin(); + } } void exchange_finalize() { - partial_exchange_finalize(); - increment_index(); - partial_exchange_finalize(); - increment_index(); + for (const auto &halo: halos_) { + halo->exchange_finalize(); + } } void reduce_begin() { diff --git a/test/gtest/mp/halo-dual.cpp b/test/gtest/mp/halo-dual.cpp index 927e532999..c47430bf55 100644 --- a/test/gtest/mp/halo-dual.cpp +++ b/test/gtest/mp/halo-dual.cpp @@ -17,11 +17,28 @@ void local_is_accessible_in_halo_region(const int halo_prev, halo_prev, halo_next); iota(dv, 0); DRLOG("exchange start"); + + std::vector datas = { dv.data(0), dv.data(100) }; + std::cout << "[datas before exchange]\n"; + for (const auto& data: datas) { + for (size_t i = 0; i < dv.data_size(); i++) { + std::cout << data[i] << " "; + } + std::cout << "\n"; + } std::cout << "[started exchange]\n"; dv.halo().exchange(); std::cout << "[finished exchange]\n"; + std::cout << "[datas after exchange]\n"; + for (const auto& data: datas) { + for (size_t i = 0; i < dv.data_size(); i++) { + std::cout << data[i] << " "; + } + std::cout << "\n"; + } + // 0 1 1 0 // 012 345 678 9ab @@ -60,8 +77,8 @@ void local_is_accessible_in_halo_region(const int halo_prev, case 3: first_local_index____ = {X, X, X, +6, 6, 3}; first_nonlocal_index_ = {X, X, X, 10, 6, 4}; - second_local_index___ = {X, X, X, X, 0, 0}; - second_nonlocal_index = {X, X, X, X, 1, 1}; + second_local_index___ = {X, X, X, +X, 0, 0}; + second_nonlocal_index = {X, X, X, +X, 1, 1}; break; case 4: first_local_index____ = {X, X, X, X, 6, 4}; From 47909365f57788ab4d4e0d855567c56cf3a91a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 12 Mar 2025 20:09:53 +0100 Subject: [PATCH 091/101] prog --- .../mp/containers/dual_distributed_vector.hpp | 12 ++- include/dr/mp/halo.hpp | 91 +++++++++++++++++-- 2 files changed, 92 insertions(+), 11 deletions(-) diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 123527a2d6..5f838c5b67 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -199,7 +199,7 @@ class dual_distributed_vector { dual_distributed_vector(dual_distributed_vector &&) { assert(false); } /// Constructor - dual_distributed_vector(std::size_t size = 0, + dual_distributed_vector(std::size_t size = 0, distribution dist = distribution()) { init(size, dist); } @@ -290,17 +290,19 @@ class dual_distributed_vector { data_size_ = segment_size_ + hb.prev + hb.next; - std::cout << "creating dual_distributed vector\n" + std::cout << "[creating dual_distributed vector]\n" << "\tsize: " << size << "\n" << "\tsegment_size_: " << segment_size_ << "\n" << "\tactual_segment_count_: " << actual_segment_count_ << "\n" - << "\tdata_size_: " << data_size_ << "\n"; + << "\tdata_size_: " << data_size_ << "\n" + << "\thalo: prev=" << hb.prev << " next=" << hb.next << " periodic=" << hb.periodic << "\n"; for (std::size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { if (size_ > 0) { datas_.push_back(static_cast(backends_[i].allocate(data_size_ * sizeof(value_type)))); std::memset(datas_[i], 69, data_size_ * sizeof(value_type)); // todo: debug remove later - halos_.push_back(new span_halo(default_comm(), datas_[i], data_size_, hb)); + std::cout << "[creating new halo (" << i << ")]\n"; + halos_.push_back(new dual_span_halo(default_comm(), datas_[i], data_size_, hb, i == 1)); } } @@ -326,7 +328,7 @@ class dual_distributed_vector { std::size_t segment_size_ = 0; std::size_t data_size_ = 0; // size + halo - std::vector *> halos_; + std::vector *> halos_; std::vector datas_; cyclic_span_halo *halo_; diff --git a/include/dr/mp/halo.hpp b/include/dr/mp/halo.hpp index bd809f1574..37244e11e2 100644 --- a/include/dr/mp/halo.hpp +++ b/include/dr/mp/halo.hpp @@ -394,13 +394,13 @@ class span_halo : public span_halo_impl { std::vector owned; DRLOG("owned groups {}/{} first/last", comm.first(), comm.last()); if (hb.next > 0 && (hb.periodic || !comm.first())) { - owned.emplace_back(span.subspan(hb.prev, hb.next), comm.prev(), - halo_tag::reverse); + owned.emplace_back(span.subspan(hb.prev, hb.next), + comm.prev(), halo_tag::reverse); } if (hb.prev > 0 && (hb.periodic || !comm.last())) { owned.emplace_back( span.subspan(rng::size(span) - (hb.prev + hb.next), hb.prev), - comm.next(), halo_tag::forward); + comm.next(), halo_tag::forward); } return owned; } @@ -409,10 +409,89 @@ class span_halo : public span_halo_impl { halo_groups(communicator comm, std::span span, halo_bounds hb) { std::vector halo; if (hb.prev > 0 && (hb.periodic || !comm.first())) { - halo.emplace_back(span.first(hb.prev), comm.prev(), halo_tag::forward); + halo.emplace_back(span.first(hb.prev), + comm.prev(), halo_tag::forward); } if (hb.next > 0 && (hb.periodic || !comm.last())) { - halo.emplace_back(span.last(hb.next), comm.next(), halo_tag::reverse); + halo.emplace_back(span.last(hb.next), + comm.next(), halo_tag::reverse); + } + return halo; + } +}; + +template > +class dual_span_halo : public span_halo_impl { +public: + using group_type = span_group; + + dual_span_halo() : span_halo_impl(communicator(), {}, {}) {} + + dual_span_halo(communicator comm, T *data, std::size_t size, halo_bounds hb, bool rev = false) + : span_halo_impl(comm, owned_groups(comm, {data, size}, hb, rev), + halo_groups(comm, {data, size}, hb, rev)) { + check(size, hb); + } + + dual_span_halo(communicator comm, std::span span, halo_bounds hb, bool rev = false) + : span_halo_impl(comm, owned_groups(comm, span, hb, rev), + halo_groups(comm, span, hb, rev)) {} + +private: + void check(auto size, auto hb) { + assert(size >= hb.prev + hb.next + std::max(hb.prev, hb.next)); + } + + static std::vector + owned_groups(communicator comm, std::span span, halo_bounds hb, bool rev) { + std::vector owned; + + bool should_make_left = hb.next > 0 && (hb.periodic || !(rev ? comm.last() : comm.first())); + bool should_make_right = hb.prev > 0 && (hb.periodic || !(rev ? comm.first() : comm.last())); + + DRLOG("owned groups {}/{} first/last", comm.first(), comm.last()); + if (should_make_left) { + std::cout << "\tnew owned: span=(" << hb.prev << ", " << hb.next + << ") rank=" << (rev ? comm.next() : comm.prev()) + << " tag=" << (rev ? "forward\n" : "reverse\n"); + owned.emplace_back(span.subspan(hb.prev, hb.next), + rev ? comm.next() : comm.prev(), + rev ? halo_tag::forward : halo_tag::reverse); + } + if (should_make_right) { + std::cout << "\tnew owned: span=(" << rng::size(span) - (hb.prev + hb.next) << ", " << hb.prev + << ") rank=" << (rev ? comm.prev() : comm.next()) + << " tag=" << (rev ? "reverse\n" : "forward\n"); + owned.emplace_back( + span.subspan(rng::size(span) - (hb.prev + hb.next), hb.prev), + rev ? comm.prev() : comm.next(), + rev ? halo_tag::reverse : halo_tag::forward); + } + return owned; + } + + static std::vector + halo_groups(communicator comm, std::span span, halo_bounds hb, bool rev) { + std::vector halo; + + bool should_make_left = hb.prev > 0 && (hb.periodic || !(rev ? comm.last() : comm.first())); + bool should_make_right = hb.next > 0 && (hb.periodic || !(rev ? comm.first() : comm.last())); + + if (should_make_left) { + std::cout << "\tnew halo: span=(first " << hb.prev + << ") rank=" << (rev ? comm.next() : comm.prev()) + << " tag=" << (rev ? "reverse\n" : "forward\n"); + halo.emplace_back(span.first(hb.prev), + rev ? comm.next() : comm.prev(), + rev ? halo_tag::reverse : halo_tag::forward); + } + if (should_make_right) { + std::cout << "\tnew halo: span=(last " << hb.next + << ") rank=" << (rev ? comm.prev() : comm.next()) + << " tag=" << (rev ? "forward\n" : "reverse\n"); + halo.emplace_back(span.last(hb.next), + rev ? comm.prev() : comm.next(), + rev ? halo_tag::forward : halo_tag::reverse); } return halo; } @@ -422,7 +501,7 @@ template > class cyclic_span_halo { public: using group_type = span_group; - using halo_type = span_halo; + using halo_type = dual_span_halo; cyclic_span_halo(const std::vector& halos) : halos_(halos) { From c25edcef29458a4b698256abe19bdbd8a81d547c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 13 Mar 2025 15:45:06 +0100 Subject: [PATCH 092/101] prog --- include/dr/mp/algorithms/for_each.hpp | 10 +-- .../mp/containers/dual_distributed_vector.hpp | 13 ++- include/dr/mp/containers/dual_segment.hpp | 5 -- include/dr/mp/halo.hpp | 12 --- include/dr/mp/views/views.hpp | 1 - test/gtest/mp/halo-dual.cpp | 87 +++++-------------- 6 files changed, 34 insertions(+), 94 deletions(-) diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index 070b934f80..0770049246 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -28,7 +28,6 @@ void for_each(dual_vector_range auto &&dr, auto op) { } void partial_for_each(dual_vector_range auto &&dr, auto op) { - std::cout << "partial_for_each()" << std::endl; dr::drlog.debug(dr::logger::for_each, "partial_for_each: parallel execution\n"); if (rng::empty(dr)) { return; @@ -76,15 +75,14 @@ void for_each(dr::distributed_range auto &&dr, auto op) { assert(rng::distance(s) > 0); #ifdef SYCL_LANGUAGE_VERSION - // dr::__detail::parallel_for( - // dr::mp::sycl_queue(), sycl::range<1>(rng::distance(s)), - // [first = rng::begin(s), op](auto idx) { op(first[idx]); }) - // .wait(); + dr::__detail::parallel_for( + dr::mp::sycl_queue(), sycl::range<1>(rng::distance(s)), + [first = rng::begin(s), op](auto idx) { op(first[idx]); }) + .wait(); #else assert(false); #endif } else { - std::cout << "for_eaching" << std::endl; dr::drlog.debug(" using cpu\n"); rng::for_each(s, op); } diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 5f838c5b67..59c13fdebf 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -290,18 +290,17 @@ class dual_distributed_vector { data_size_ = segment_size_ + hb.prev + hb.next; - std::cout << "[creating dual_distributed vector]\n" - << "\tsize: " << size << "\n" - << "\tsegment_size_: " << segment_size_ << "\n" - << "\tactual_segment_count_: " << actual_segment_count_ << "\n" - << "\tdata_size_: " << data_size_ << "\n" - << "\thalo: prev=" << hb.prev << " next=" << hb.next << " periodic=" << hb.periodic << "\n"; + // std::cout << "[creating dual_distributed vector]\n" + // << "\tsize: " << size << "\n" + // << "\tsegment_size_: " << segment_size_ << "\n" + // << "\tactual_segment_count_: " << actual_segment_count_ << "\n" + // << "\tdata_size_: " << data_size_ << "\n" + // << "\thalo: prev=" << hb.prev << " next=" << hb.next << " periodic=" << hb.periodic << "\n"; for (std::size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { if (size_ > 0) { datas_.push_back(static_cast(backends_[i].allocate(data_size_ * sizeof(value_type)))); std::memset(datas_[i], 69, data_size_ * sizeof(value_type)); // todo: debug remove later - std::cout << "[creating new halo (" << i << ")]\n"; halos_.push_back(new dual_span_halo(default_comm(), datas_[i], data_size_, hb, i == 1)); } } diff --git a/include/dr/mp/containers/dual_segment.hpp b/include/dr/mp/containers/dual_segment.hpp index 357d688c0f..c45919d9fd 100644 --- a/include/dr/mp/containers/dual_segment.hpp +++ b/include/dr/mp/containers/dual_segment.hpp @@ -178,10 +178,7 @@ template class dual_dv_segment_iterator { ? my_process_rank == normalized_segment_index + 1 : my_process_rank + 1 == normalized_segment_index; - std::cout << "my_process_rank, normalized_segment_index = " << my_process_rank << " " << normalized_segment_index << "\n"; - if (is_in_bounds) { - std::cout << "case is_in_bounds\n"; assert(!is_in_halo_prev && !is_in_halo_next); return data + index_ + dv_->distribution_.halo().prev; } @@ -190,7 +187,6 @@ template class dual_dv_segment_iterator { #endif // sliding view needs local iterators that point to the halo if (is_in_halo_prev) { - std::cout << "case is_in_halo_prev\n"; assert(!is_in_bounds && !is_in_halo_next); #ifndef SYCL_LANGUAGE_VERSION assert(index_ <= dv_->distribution_.halo().next); // <= instead of < to cover end() case @@ -199,7 +195,6 @@ template class dual_dv_segment_iterator { } if (is_in_halo_next) { - std::cout << "case is_in_halo_next\n"; assert(!is_in_bounds && !is_in_halo_prev); #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); diff --git a/include/dr/mp/halo.hpp b/include/dr/mp/halo.hpp index 37244e11e2..3a2557dde7 100644 --- a/include/dr/mp/halo.hpp +++ b/include/dr/mp/halo.hpp @@ -451,17 +451,11 @@ class dual_span_halo : public span_halo_impl { DRLOG("owned groups {}/{} first/last", comm.first(), comm.last()); if (should_make_left) { - std::cout << "\tnew owned: span=(" << hb.prev << ", " << hb.next - << ") rank=" << (rev ? comm.next() : comm.prev()) - << " tag=" << (rev ? "forward\n" : "reverse\n"); owned.emplace_back(span.subspan(hb.prev, hb.next), rev ? comm.next() : comm.prev(), rev ? halo_tag::forward : halo_tag::reverse); } if (should_make_right) { - std::cout << "\tnew owned: span=(" << rng::size(span) - (hb.prev + hb.next) << ", " << hb.prev - << ") rank=" << (rev ? comm.prev() : comm.next()) - << " tag=" << (rev ? "reverse\n" : "forward\n"); owned.emplace_back( span.subspan(rng::size(span) - (hb.prev + hb.next), hb.prev), rev ? comm.prev() : comm.next(), @@ -478,17 +472,11 @@ class dual_span_halo : public span_halo_impl { bool should_make_right = hb.next > 0 && (hb.periodic || !(rev ? comm.first() : comm.last())); if (should_make_left) { - std::cout << "\tnew halo: span=(first " << hb.prev - << ") rank=" << (rev ? comm.next() : comm.prev()) - << " tag=" << (rev ? "reverse\n" : "forward\n"); halo.emplace_back(span.first(hb.prev), rev ? comm.next() : comm.prev(), rev ? halo_tag::reverse : halo_tag::forward); } if (should_make_right) { - std::cout << "\tnew halo: span=(last " << hb.next - << ") rank=" << (rev ? comm.prev() : comm.next()) - << " tag=" << (rev ? "forward\n" : "reverse\n"); halo.emplace_back(span.last(hb.next), rev ? comm.prev() : comm.next(), rev ? halo_tag::forward : halo_tag::reverse); diff --git a/include/dr/mp/views/views.hpp b/include/dr/mp/views/views.hpp index a31142548d..44310a7f75 100644 --- a/include/dr/mp/views/views.hpp +++ b/include/dr/mp/views/views.hpp @@ -14,7 +14,6 @@ namespace dr::mp { // segment to local template auto local_segments(R &&dr) { auto is_local = [](const auto &segment) { - std::cout << "local_segments::is_local: [seg rank] " << dr::ranges::rank(segment) << " == " << default_comm().rank() << " [comm rank]" << std::endl; return dr::ranges::rank(segment) == default_comm().rank(); }; // Convert from remote iter to local iter diff --git a/test/gtest/mp/halo-dual.cpp b/test/gtest/mp/halo-dual.cpp index c47430bf55..19477dbc3f 100644 --- a/test/gtest/mp/halo-dual.cpp +++ b/test/gtest/mp/halo-dual.cpp @@ -18,79 +18,45 @@ void local_is_accessible_in_halo_region(const int halo_prev, iota(dv, 0); DRLOG("exchange start"); - std::vector datas = { dv.data(0), dv.data(100) }; - std::cout << "[datas before exchange]\n"; - for (const auto& data: datas) { - for (size_t i = 0; i < dv.data_size(); i++) { - std::cout << data[i] << " "; - } - std::cout << "\n"; - } - - std::cout << "[started exchange]\n"; dv.halo().exchange(); - std::cout << "[finished exchange]\n"; - - std::cout << "[datas after exchange]\n"; - for (const auto& data: datas) { - for (size_t i = 0; i < dv.data_size(); i++) { - std::cout << data[i] << " "; - } - std::cout << "\n"; - } - - // 0 1 1 0 - // 012 345 678 9ab - - // 0 1 2 2 1 0 - // 01 23 45 67 89 ab - - // 0 1 2 3 3 2 - // 01 23 45 67 89 ab // arrays below is function depending on size of communicator-1 - std::array first_local_index____; - std::array first_nonlocal_index_; - std::array second_local_index___; - std::array second_nonlocal_index; + std::array first_local_index____; + std::array first_nonlocal_index_; + std::array second_local_index___; + std::array second_nonlocal_index; const int X = 10000; // to mark unused value switch (dr::mp::default_comm().rank()) { case 0: - first_local_index____ = {+0, +0, +0, 0, 0, 0}; - first_nonlocal_index_ = {12, +3, +2, 2, 1, 1}; - second_local_index___ = {+X, +9, 10, X, 0, 0}; - second_nonlocal_index = {+X, 12, 12, X, 1, 1}; + first_local_index____ = {+0, +0, +0, 0}; + first_nonlocal_index_ = {12, +3, +2, 2}; + second_local_index___ = {+X, +9, 10, X}; + second_nonlocal_index = {+X, 12, 12, X}; break; case 1: - first_local_index____ = {X, 3, +2, 2, 2, 1}; - first_nonlocal_index_ = {X, 9, +4, 4, 4, 2}; - second_local_index___ = {X, X, +8, X, 0, 0}; - second_nonlocal_index = {X, X, 10, X, 1, 1}; + first_local_index____ = {X, 3, +2, 2}; + first_nonlocal_index_ = {X, 9, +4, 4}; + second_local_index___ = {X, X, +8, X}; + second_nonlocal_index = {X, X, 10, X}; break; case 2: - first_local_index____ = {X, X, 4, +4, 4, 2}; - first_nonlocal_index_ = {X, X, 8, +6, 6, 3}; - second_local_index___ = {X, X, X, 10, 0, 0}; - second_nonlocal_index = {X, X, X, 12, 1, 1}; + first_local_index____ = {X, X, 4, +4}; + first_nonlocal_index_ = {X, X, 8, +6}; + second_local_index___ = {X, X, X, 10}; + second_nonlocal_index = {X, X, X, 12}; break; case 3: - first_local_index____ = {X, X, X, +6, 6, 3}; - first_nonlocal_index_ = {X, X, X, 10, 6, 4}; - second_local_index___ = {X, X, X, +X, 0, 0}; - second_nonlocal_index = {X, X, X, +X, 1, 1}; - break; - case 4: - first_local_index____ = {X, X, X, X, 6, 4}; - first_nonlocal_index_ = {X, X, X, X, 6, 5}; - break; - case 5: - first_local_index____ = {X, X, X, X, X, 5}; - first_nonlocal_index_ = {X, X, X, X, X, 6}; + first_local_index____ = {X, X, X, +6}; + first_nonlocal_index_ = {X, X, X, 10}; + second_local_index___ = {X, X, X, +X}; + second_nonlocal_index = {X, X, X, +X}; break; default: - first_local_index____ = {X, X, X, X, X, X}; - first_nonlocal_index_ = {X, X, X, X, X, X}; + first_local_index____ = {X, X, X, X}; + first_nonlocal_index_ = {X, X, X, X}; + second_local_index___ = {X, X, X, X}; + second_nonlocal_index = {X, X, X, X}; } const auto c = dr::mp::default_comm().size() - 1; @@ -99,15 +65,11 @@ void local_is_accessible_in_halo_region(const int halo_prev, auto second_legal_idx = std::max(0, second_local_index___[c] - halo_prev); auto second_illegal_idx = std::min(12, second_nonlocal_index[c] + halo_next); - std::cout << "first_legal_idx, first_illegal_idx: " << first_legal_idx << " " << first_illegal_idx << "\n"; - std::cout << "second_legal_idx, second_illegal_idx: " << second_legal_idx << " " << second_illegal_idx << "\n"; - DRLOG("checking access to idx between first legal {} and first illegal {}, " "c:{}", first_legal_idx, first_illegal_idx, c); for (int idx = first_legal_idx; idx < first_illegal_idx; ++idx) { - std::cout << "[idx = " << idx << "]\n"; typename DV::value_type *local_ptr = (dv.begin() + idx).local(); EXPECT_TRUE(local_ptr != nullptr); typename DV::value_type value_on_host; @@ -126,7 +88,6 @@ void local_is_accessible_in_halo_region(const int halo_prev, second_legal_idx, second_illegal_idx, c); for (int idx = second_legal_idx; idx < second_illegal_idx; ++idx) { - std::cout << "[idx = " << idx << "]\n"; typename DV::value_type *local_ptr = (dv.begin() + idx).local(); EXPECT_TRUE(local_ptr != nullptr); typename DV::value_type value_on_host; From 68cadd593c3fe092cc83392bdac9a9c665cf0c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 13 Mar 2025 18:16:59 +0100 Subject: [PATCH 093/101] prog --- include/dr/mp/containers/segment.hpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/dr/mp/containers/segment.hpp b/include/dr/mp/containers/segment.hpp index c967331426..56724ac61b 100644 --- a/include/dr/mp/containers/segment.hpp +++ b/include/dr/mp/containers/segment.hpp @@ -160,16 +160,13 @@ template class dv_segment_iterator { #endif const auto my_process_segment_index = dv_->backend.getrank(); - if (my_process_segment_index == segment_index_) { - std::cout << "case 0\n"; + if (my_process_segment_index == segment_index_) return dv_->data_ + index_ + dv_->distribution_.halo().prev; - } #ifndef SYCL_LANGUAGE_VERSION assert(!dv_->distribution_.halo().periodic); // not implemented #endif // sliding view needs local iterators that point to the halo if (my_process_segment_index + 1 == segment_index_) { - std::cout << "case 1\n"; #ifndef SYCL_LANGUAGE_VERSION assert(index_ <= dv_->distribution_.halo() .next); // <= instead of < to cover end() case @@ -179,7 +176,6 @@ template class dv_segment_iterator { } if (my_process_segment_index == segment_index_ + 1) { - std::cout << "case 2\n"; #ifndef SYCL_LANGUAGE_VERSION assert(dv_->segment_size_ - index_ <= dv_->distribution_.halo().prev); #endif From 7efc1dded8d3512d16f28621af5e8cfaefe2f81a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 13 Mar 2025 18:18:19 +0100 Subject: [PATCH 094/101] prog --- include/dr/mp/algorithms/for_each.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index 0770049246..870a65de08 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -18,6 +18,7 @@ namespace dr::mp { +// the concept doesn't work yet... for some reason template concept dual_vector_range = dr::distributed_range && requires(R &r) { dr::ranges::segments(r)[0].is_compute(); }; From bfcad1aad905bbeb3396dc259494a8e4dd070c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Mon, 7 Apr 2025 22:29:06 +0200 Subject: [PATCH 095/101] prog --- include/dr/mp/algorithms/for_each.hpp | 15 +- .../mp/containers/dual_distributed_vector.hpp | 6 +- test/gtest/mp/halo-dual.cpp | 143 +++++++++++++----- 3 files changed, 123 insertions(+), 41 deletions(-) diff --git a/include/dr/mp/algorithms/for_each.hpp b/include/dr/mp/algorithms/for_each.hpp index 870a65de08..0bee54b40c 100644 --- a/include/dr/mp/algorithms/for_each.hpp +++ b/include/dr/mp/algorithms/for_each.hpp @@ -33,13 +33,20 @@ void partial_for_each(dual_vector_range auto &&dr, auto op) { if (rng::empty(dr)) { return; } + + auto is_local = [](const auto &segment) { + return dr::ranges::rank(segment) == default_comm().rank(); + }; - for (auto &s : local_segments(dr)) { - if (!s.is_compute()) { - s.swap_state(); + for (auto &seg : dr::ranges::segments(dr) | rng::views::filter(is_local)) { + if (!seg.is_compute()) { + seg.swap_state(); continue; } + auto b = dr::ranges::local(rng::begin(seg)); + auto s = rng::subrange(b, b + rng::distance(seg)); + if (mp::use_sycl()) { dr::drlog.debug(" using sycl\n"); @@ -57,7 +64,7 @@ void partial_for_each(dual_vector_range auto &&dr, auto op) { rng::for_each(s, op); } - s.swap_state(); + seg.swap_state(); } barrier(); } diff --git a/include/dr/mp/containers/dual_distributed_vector.hpp b/include/dr/mp/containers/dual_distributed_vector.hpp index 59c13fdebf..06a1ca578e 100644 --- a/include/dr/mp/containers/dual_distributed_vector.hpp +++ b/include/dr/mp/containers/dual_distributed_vector.hpp @@ -240,7 +240,7 @@ class dual_distributed_vector { auto &halo() const { return *halo_; } auto segments() const { return rng::views::all(segments_); } - // auto segments() { return rng::views::all(segments_); } + auto segments() { return rng::views::all(segments_); } __attribute__((unused)) void fence(const std::size_t i) { backends_[i].fence(); } @@ -300,7 +300,7 @@ class dual_distributed_vector { for (std::size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { if (size_ > 0) { datas_.push_back(static_cast(backends_[i].allocate(data_size_ * sizeof(value_type)))); - std::memset(datas_[i], 69, data_size_ * sizeof(value_type)); // todo: debug remove later + std::memset(datas_[i], 69, data_size_ * sizeof(value_type)); halos_.push_back(new dual_span_halo(default_comm(), datas_[i], data_size_, hb, i == 1)); } } @@ -314,7 +314,7 @@ class dual_distributed_vector { } for (size_t i = 0; i < default_comm().size(); i++) { - segments_[i].swap_state(); + segments_[default_comm().size() + i].swap_state(); } for (size_t i = 0; i < DUAL_SEGMENTS_PER_PROC; i++) { diff --git a/test/gtest/mp/halo-dual.cpp b/test/gtest/mp/halo-dual.cpp index 19477dbc3f..1746ce42b6 100644 --- a/test/gtest/mp/halo-dual.cpp +++ b/test/gtest/mp/halo-dual.cpp @@ -8,6 +8,21 @@ template class HaloDual : public testing::Test {}; TYPED_TEST_SUITE(HaloDual, ::testing::Types>); +template +void check_matching(DV &dv, int idx, int expected_value) { + typename DV::value_type *local_ptr = (dv.begin() + idx).local(); + EXPECT_TRUE(local_ptr != nullptr); + typename DV::value_type value_on_host; + + if (dr::mp::use_sycl()) + dr::mp::__detail::sycl_copy(local_ptr, &value_on_host); + else + value_on_host = *local_ptr; + + DRLOG("checking idx:{} expected:{}", idx, expected_value); + EXPECT_EQ(value_on_host, expected_value); +} + template void local_is_accessible_in_halo_region(const int halo_prev, const int halo_next) { @@ -70,17 +85,7 @@ void local_is_accessible_in_halo_region(const int halo_prev, first_legal_idx, first_illegal_idx, c); for (int idx = first_legal_idx; idx < first_illegal_idx; ++idx) { - typename DV::value_type *local_ptr = (dv.begin() + idx).local(); - EXPECT_TRUE(local_ptr != nullptr); - typename DV::value_type value_on_host; - - if (dr::mp::use_sycl()) - dr::mp::__detail::sycl_copy(local_ptr, &value_on_host); - else - value_on_host = *local_ptr; - - DRLOG("checking idx:{}", idx); - EXPECT_EQ(value_on_host, idx); + check_matching(dv, idx, idx); } DRLOG("checking access to idx between second legal {} and second illegal {}, " @@ -88,32 +93,10 @@ void local_is_accessible_in_halo_region(const int halo_prev, second_legal_idx, second_illegal_idx, c); for (int idx = second_legal_idx; idx < second_illegal_idx; ++idx) { - typename DV::value_type *local_ptr = (dv.begin() + idx).local(); - EXPECT_TRUE(local_ptr != nullptr); - typename DV::value_type value_on_host; - - if (dr::mp::use_sycl()) - dr::mp::__detail::sycl_copy(local_ptr, &value_on_host); - else - value_on_host = *local_ptr; - - DRLOG("checking idx:{}", idx); - EXPECT_EQ(value_on_host, idx); + check_matching(dv, idx, idx); } DRLOG("checks ok"); - - // although assertions indeed happen, but they are not caught by EXPECT_DEATH - // if (first_illegal_idx < 6) { - // dr::drlog.debug("checking first illegal idx:{} after legal ones\n", - // first_illegal_idx); - // EXPECT_DEATH((dv.begin() + first_illegal_idx).local(), "Assertion.*"); - // } - // if (first_legal_idx > 0) { - // dr::drlog.debug("checking last illegal idx:{} before legal ones\n", - // first_legal_idx - 1); - // EXPECT_DEATH((dv.begin() + first_legal_idx - 1).local(), "Assertion.*"); - // } } TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_11) { @@ -127,3 +110,95 @@ TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_10) { TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_01) { local_is_accessible_in_halo_region(0, 1); } + +template +void local_is_accessible_in_halo_region__partial(const int halo_prev, + const int halo_next) { + + DV dv(12, dr::mp::distribution().halo(halo_prev, halo_next)); + DRLOG("local_is_accessible_in_halo_region TEST START, prev:{}, next:{}", + halo_prev, halo_next); + iota(dv, 0); + DRLOG("exchange start"); + + dv.halo().exchange(); + + // arrays below is function depending on size of communicator-1 + std::array first_segment_begin_; + std::array first_segment_end___; + std::array second_segment_begin; + std::array second_segment_end__; + const int X = 10000; // to mark unused value + + switch (dr::mp::default_comm().rank()) { + case 0: + first_segment_begin_ = {+0, +0, +0, 0}; + first_segment_end___ = {+6, +3, +2, 2}; + second_segment_begin = {+6, +9, 10, X}; + second_segment_end__ = {12, 12, 12, X}; + break; + case 1: + first_segment_begin_ = {X, 3, +2, 2}; + first_segment_end___ = {X, 6, +4, 4}; + second_segment_begin = {X, 6, +8, X}; + second_segment_end__ = {X, 9, 10, X}; + break; + case 2: + first_segment_begin_ = {X, X, 4, +4}; + first_segment_end___ = {X, X, 6, +6}; + second_segment_begin = {X, X, 6, 10}; + second_segment_end__ = {X, X, 8, 12}; + break; + case 3: + first_segment_begin_ = {X, X, X, +6}; + first_segment_end___ = {X, X, X, +8}; + second_segment_begin = {X, X, X, +8}; + second_segment_end__ = {X, X, X, 10}; + break; + default: + first_segment_begin_ = {X, X, X, X}; + first_segment_end___ = {X, X, X, X}; + second_segment_begin = {X, X, X, X}; + second_segment_end__ = {X, X, X, X}; + } + + const auto c = dr::mp::default_comm().size() - 1; + auto first_legal_idx = std::max(0, first_segment_begin_[c] - halo_prev); + auto first_illegal_idx = std::min(12, first_segment_end___[c] + halo_next); + auto second_legal_idx = std::max(0, second_segment_begin[c] - halo_prev); + auto second_illegal_idx = std::min(12, second_segment_end__[c] + halo_next); + + if (first_segment_end___[c] == second_segment_begin[c]) { + // we own the middle segment + first_illegal_idx = std::min(12, first_segment_end___[c]); + second_legal_idx = std::max(0, second_segment_begin[c]); + } + + constexpr size_t N_STEPS = 5; + auto foreach_fn = [](auto&& elem) { elem *= 10; }; + int expected_multiplier = 1; + + for (size_t i = 0; i < N_STEPS; i++) { + expected_multiplier *= 10; + + partial_for_each(dv, foreach_fn); + dv.halo().partial_exchange(); + + for (int idx = first_legal_idx; idx < first_illegal_idx; ++idx) { + check_matching(dv, idx, idx * expected_multiplier); + } + + partial_for_each(dv, foreach_fn); + dv.halo().partial_exchange(); + + for (int idx = second_legal_idx; idx < second_illegal_idx; ++idx) { + check_matching(dv, idx, idx * expected_multiplier); + } + } + + DRLOG("checks ok"); +} + +TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_11__partial) { + local_is_accessible_in_halo_region__partial(0, 1); +} From 6a57340eae4d77b2924fb6662fa14d4d6578763d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Wed, 23 Apr 2025 01:29:29 +0200 Subject: [PATCH 096/101] prog --- test/gtest/mp/halo-dual.cpp | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/test/gtest/mp/halo-dual.cpp b/test/gtest/mp/halo-dual.cpp index 1746ce42b6..2107194d5c 100644 --- a/test/gtest/mp/halo-dual.cpp +++ b/test/gtest/mp/halo-dual.cpp @@ -202,3 +202,67 @@ void local_is_accessible_in_halo_region__partial(const int halo_prev, TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_11__partial) { local_is_accessible_in_halo_region__partial(0, 1); } + +// perf test! + +static constexpr size_t DISTRIBUTED_VECTOR_SIZE = 200000; +static constexpr size_t N_STEPS = 200000; +auto stencil1d_subrange_op = [](auto ¢er) { + auto win = ¢er; + return win[-1] + win[0] + win[1]; +}; + +void perf_test_dual() { + dr::mp::dual_distributed_vector dv(DISTRIBUTED_VECTOR_SIZE, dr::mp::distribution().halo(1, 1)); + DRLOG("perf_test_dual TEST START"); + iota(dv, 0); + DRLOG("exchange start"); + + auto start = std::chrono::high_resolution_clock::now(); + + dv.halo().exchange(); + + // auto dv_subrange = rng::subrange(dv.begin() + 1, dv.end() - 1); + + for (size_t i = 0; i < N_STEPS; i++) { + partial_for_each(dv, stencil1d_subrange_op); + dv.halo().partial_exchange(); + + partial_for_each(dv, stencil1d_subrange_op); + dv.halo().partial_exchange(); + } + + auto end = std::chrono::high_resolution_clock::now(); + auto duration = duration_cast(end - start); + std::cout << "\tperf_test_dual time: " << duration.count() << "ms" << std::endl; +} + +void perf_test_classic() { + dr::mp::distributed_vector dv(DISTRIBUTED_VECTOR_SIZE, dr::mp::distribution().halo(1, 1)); + DRLOG("perf_test TEST START"); + iota(dv, 0); + DRLOG("exchange start"); + + auto start = std::chrono::high_resolution_clock::now(); + + dv.halo().exchange(); + + // auto dv_subrange = rng::subrange(dv.begin() + 1, dv.end() - 1); + + for (size_t i = 0; i < N_STEPS; i++) { + for_each(dv, stencil1d_subrange_op); + dv.halo().exchange(); + } + + auto end = std::chrono::high_resolution_clock::now(); + auto duration = duration_cast(end - start); + std::cout << "\tperf_test time: " << duration.count() << "ms" << std::endl; +} + +TYPED_TEST(HaloDual, perf_test_dual_dv) { + perf_test_dual(); +} + +TYPED_TEST(HaloDual, perf_test_classic_dv) { + perf_test_classic(); +} From b9f85f4e959a47e8d7d074d0d9d475421c3324f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 1 May 2025 01:42:44 +0200 Subject: [PATCH 097/101] prog --- test/gtest/mp/halo-dual.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/test/gtest/mp/halo-dual.cpp b/test/gtest/mp/halo-dual.cpp index 2107194d5c..910a2eb1cc 100644 --- a/test/gtest/mp/halo-dual.cpp +++ b/test/gtest/mp/halo-dual.cpp @@ -207,8 +207,10 @@ TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_11__partial) { static constexpr size_t DISTRIBUTED_VECTOR_SIZE = 200000; static constexpr size_t N_STEPS = 200000; +size_t call_count = 0; auto stencil1d_subrange_op = [](auto ¢er) { auto win = ¢er; + call_count++; return win[-1] + win[0] + win[1]; }; @@ -220,21 +222,24 @@ void perf_test_dual() { auto start = std::chrono::high_resolution_clock::now(); + call_count = 0; dv.halo().exchange(); // auto dv_subrange = rng::subrange(dv.begin() + 1, dv.end() - 1); for (size_t i = 0; i < N_STEPS; i++) { + dv.halo().partial_exchange_begin(); partial_for_each(dv, stencil1d_subrange_op); - dv.halo().partial_exchange(); + dv.halo().partial_exchange_finalize(); + dv.halo().partial_exchange_begin(); partial_for_each(dv, stencil1d_subrange_op); - dv.halo().partial_exchange(); + dv.halo().partial_exchange_finalize(); } auto end = std::chrono::high_resolution_clock::now(); auto duration = duration_cast(end - start); - std::cout << "\tperf_test_dual time: " << duration.count() << "ms" << std::endl; + std::cout << "perf_test_dual results: \n\ttime: " << duration.count() << "ms \n\tcall_count = " << call_count << std::endl; } void perf_test_classic() { @@ -245,6 +250,7 @@ void perf_test_classic() { auto start = std::chrono::high_resolution_clock::now(); + call_count = 0; dv.halo().exchange(); // auto dv_subrange = rng::subrange(dv.begin() + 1, dv.end() - 1); @@ -256,7 +262,7 @@ void perf_test_classic() { auto end = std::chrono::high_resolution_clock::now(); auto duration = duration_cast(end - start); - std::cout << "\tperf_test time: " << duration.count() << "ms" << std::endl; + std::cout << "perf_test results: \n\ttime: " << duration.count() << "ms \n\tcall_count = " << call_count << std::endl; } TYPED_TEST(HaloDual, perf_test_dual_dv) { From c9f663f09c96e96e7afa33b104eb9369f40c9d1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 1 May 2025 01:44:26 +0200 Subject: [PATCH 098/101] prog --- test/gtest/mp/halo-dual.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gtest/mp/halo-dual.cpp b/test/gtest/mp/halo-dual.cpp index 910a2eb1cc..a3568c8512 100644 --- a/test/gtest/mp/halo-dual.cpp +++ b/test/gtest/mp/halo-dual.cpp @@ -210,7 +210,7 @@ static constexpr size_t N_STEPS = 200000; size_t call_count = 0; auto stencil1d_subrange_op = [](auto ¢er) { auto win = ¢er; - call_count++; + // call_count++; return win[-1] + win[0] + win[1]; }; From 91bd445d7811ebce3fccdd64640692773f0742d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 1 May 2025 01:47:36 +0200 Subject: [PATCH 099/101] prog --- include/dr/mp/halo.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dr/mp/halo.hpp b/include/dr/mp/halo.hpp index 3a2557dde7..283d9d4a68 100644 --- a/include/dr/mp/halo.hpp +++ b/include/dr/mp/halo.hpp @@ -500,7 +500,7 @@ class cyclic_span_halo { void partial_exchange_begin() { halos_[next_comm_index_]->exchange_begin(); - increment_index(); + // increment_index(); } void partial_exchange_finalize() { From f17cb402d382178f0ba57532d4ed5edaa4468244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 1 May 2025 02:04:30 +0200 Subject: [PATCH 100/101] prog --- test/gtest/mp/halo-dual.cpp | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/test/gtest/mp/halo-dual.cpp b/test/gtest/mp/halo-dual.cpp index a3568c8512..ea1f0a7b6a 100644 --- a/test/gtest/mp/halo-dual.cpp +++ b/test/gtest/mp/halo-dual.cpp @@ -205,8 +205,8 @@ TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_11__partial) { // perf test! -static constexpr size_t DISTRIBUTED_VECTOR_SIZE = 200000; -static constexpr size_t N_STEPS = 200000; +static constexpr size_t DISTRIBUTED_VECTOR_SIZE = 100000; +static constexpr size_t N_STEPS = 100000; size_t call_count = 0; auto stencil1d_subrange_op = [](auto ¢er) { auto win = ¢er; @@ -214,6 +214,21 @@ auto stencil1d_subrange_op = [](auto ¢er) { return win[-1] + win[0] + win[1]; }; +auto stencil1d_subrange_op__heavy = [](auto ¢er) { + auto win = ¢er; + auto result = win[-1] + win[0] + win[1]; + + for (int i = 1; i < 10000; i++) { + if (i % 2 == 0) { + result *= i; + } else { + result /= i; + } + } + + return result; +}; + void perf_test_dual() { dr::mp::dual_distributed_vector dv(DISTRIBUTED_VECTOR_SIZE, dr::mp::distribution().halo(1, 1)); DRLOG("perf_test_dual TEST START"); @@ -227,13 +242,9 @@ void perf_test_dual() { // auto dv_subrange = rng::subrange(dv.begin() + 1, dv.end() - 1); - for (size_t i = 0; i < N_STEPS; i++) { - dv.halo().partial_exchange_begin(); - partial_for_each(dv, stencil1d_subrange_op); - dv.halo().partial_exchange_finalize(); - + for (size_t i = 0; i < 2 * N_STEPS; i++) { dv.halo().partial_exchange_begin(); - partial_for_each(dv, stencil1d_subrange_op); + partial_for_each(dv, stencil1d_subrange_op__heavy); dv.halo().partial_exchange_finalize(); } @@ -256,7 +267,7 @@ void perf_test_classic() { // auto dv_subrange = rng::subrange(dv.begin() + 1, dv.end() - 1); for (size_t i = 0; i < N_STEPS; i++) { - for_each(dv, stencil1d_subrange_op); + for_each(dv, stencil1d_subrange_op__heavy); dv.halo().exchange(); } From c3fe0df8c27a48d23fc81d016121c100e7690c08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C5=82=C4=99bocki?= Date: Thu, 1 May 2025 02:05:32 +0200 Subject: [PATCH 101/101] prog --- test/gtest/mp/halo-dual.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/gtest/mp/halo-dual.cpp b/test/gtest/mp/halo-dual.cpp index ea1f0a7b6a..2e9e45fc01 100644 --- a/test/gtest/mp/halo-dual.cpp +++ b/test/gtest/mp/halo-dual.cpp @@ -208,11 +208,11 @@ TYPED_TEST(HaloDual, local_is_accessible_in_halo_region_halo_11__partial) { static constexpr size_t DISTRIBUTED_VECTOR_SIZE = 100000; static constexpr size_t N_STEPS = 100000; size_t call_count = 0; -auto stencil1d_subrange_op = [](auto ¢er) { - auto win = ¢er; - // call_count++; - return win[-1] + win[0] + win[1]; -}; +// auto stencil1d_subrange_op = [](auto ¢er) { +// auto win = ¢er; +// // call_count++; +// return win[-1] + win[0] + win[1]; +// }; auto stencil1d_subrange_op__heavy = [](auto ¢er) { auto win = ¢er;