diff --git a/Src/Particle/AMReX_NeighborParticlesGPUImpl.H b/Src/Particle/AMReX_NeighborParticlesGPUImpl.H index d165ecc01e7..376ccfc9c3e 100644 --- a/Src/Particle/AMReX_NeighborParticlesGPUImpl.H +++ b/Src/Particle/AMReX_NeighborParticlesGPUImpl.H @@ -7,6 +7,65 @@ namespace amrex { /// \cond DOXYGEN_IGNORE namespace detail { + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void forEachIntersectingTile (IntVect const& iv, int nGrow, + Box const& grid_box, IntVect const& periodic_shift, + bool do_tiling, IntVect const& tile_size, F&& f) + { + Box cell_box(iv, iv); + cell_box.grow(nGrow); + cell_box += periodic_shift; + cell_box &= grid_box; + + if (!cell_box.ok()) { return; } + + auto const& cb_lo = cell_box.smallEnd(); + auto const& cb_hi = cell_box.bigEnd(); + +#if (AMREX_SPACEDIM == 1) + for (int i = cb_lo[0]; i <= cb_hi[0]; ++i) { + IntVect cell(AMREX_D_DECL(i, 0, 0)); + Box tbx; + int tile = getTileIndex(cell, grid_box, do_tiling, tile_size, tbx); + IntVect rep(AMREX_D_DECL(amrex::max(cb_lo[0], tbx.smallEnd(0)), 0, 0)); + if (cell == rep) { + f(tile); + } + } +#elif (AMREX_SPACEDIM == 2) + for (int j = cb_lo[1]; j <= cb_hi[1]; ++j) { + for (int i = cb_lo[0]; i <= cb_hi[0]; ++i) { + IntVect cell(AMREX_D_DECL(i, j, 0)); + Box tbx; + int tile = getTileIndex(cell, grid_box, do_tiling, tile_size, tbx); + IntVect rep(AMREX_D_DECL(amrex::max(cb_lo[0], tbx.smallEnd(0)), + amrex::max(cb_lo[1], tbx.smallEnd(1)), + 0)); + if (cell == rep) { + f(tile); + } + } + } +#else + for (int k = cb_lo[2]; k <= cb_hi[2]; ++k) { + for (int j = cb_lo[1]; j <= cb_hi[1]; ++j) { + for (int i = cb_lo[0]; i <= cb_hi[0]; ++i) { + IntVect cell(AMREX_D_DECL(i, j, k)); + Box tbx; + int tile = getTileIndex(cell, grid_box, do_tiling, tile_size, tbx); + IntVect rep(AMREX_D_DECL(amrex::max(cb_lo[0], tbx.smallEnd(0)), + amrex::max(cb_lo[1], tbx.smallEnd(1)), + amrex::max(cb_lo[2], tbx.smallEnd(2)))); + if (cell == rep) { + f(tile); + } + } + } + } +#endif + } + inline Vector getBoundaryBoxes(const Box& box, int ncells) { AMREX_ASSERT_WITH_MESSAGE(box.size() > 2*IntVect(AMREX_D_DECL(ncells, ncells, ncells)), @@ -86,7 +145,6 @@ buildNeighborMask () { int nbor_grid = isec.first; const Box isec_box = isec.second - pshift; - if ( (grid == nbor_grid) && (pshift == 0)) { continue; } neighbor_grids.insert(NeighborTask(nbor_grid, isec_box, pshift)); const int global_rank = dmap[nbor_grid]; neighbor_procs.push_back(ParallelContext::global_to_local_rank(global_rank)); @@ -173,7 +231,7 @@ buildNeighborCopyOp (bool use_boundary_neighbor) const int nisec_box = m_isec_boxes[gid].size(); const bool do_tiling = this->do_tiling; const IntVect tile_size = this->tile_size; - // auto p_code_offsets = m_code_offsets[gid].dataPtr(); + const int nGrow = m_num_neighbor_cells; AMREX_FOR_1D ( np, i, { @@ -186,7 +244,20 @@ buildNeighborCopyOp (bool use_boundary_neighbor) IntVect iv = getParticleCell(p_ptr[pid], plo, dxi, domain); for (int j=0; jdo_tiling == 0, - "Tiling on the GPU is not supported for neighbor particles."); - buildNeighborMask(); this->defineBufferMap(); diff --git a/Src/Particle/AMReX_NeighborParticlesI.H b/Src/Particle/AMReX_NeighborParticlesI.H index 3a9f1a670ae..713d649142c 100644 --- a/Src/Particle/AMReX_NeighborParticlesI.H +++ b/Src/Particle/AMReX_NeighborParticlesI.H @@ -1005,8 +1005,8 @@ selectActualNeighbors (CheckPair const& check_pair, int num_cells) const auto* pstruct = aos().dataPtr(); const auto ptile_data = this->ParticlesAt(lev, pti).getConstParticleTileData(); - Box box = pti.validbox(); - Box grownBox = pti.tilebox(); + Box box = pti.tilebox(); + Box grownBox = box; grownBox.grow(computeRefFac(0, lev).max()*m_num_neighbor_cells); const auto lo = lbound(grownBox); const auto hi = ubound(grownBox); diff --git a/Tests/Particles/NeighborParticles/MDParticleContainer.H b/Tests/Particles/NeighborParticles/MDParticleContainer.H index f6e389e21eb..de977e6232e 100644 --- a/Tests/Particles/NeighborParticles/MDParticleContainer.H +++ b/Tests/Particles/NeighborParticles/MDParticleContainer.H @@ -45,7 +45,7 @@ public: void reset_test_id (); - void checkNeighborParticles (); + void checkNeighborParticles (bool use_source_grid = true); void checkNeighborList (); diff --git a/Tests/Particles/NeighborParticles/MDParticleContainer.cpp b/Tests/Particles/NeighborParticles/MDParticleContainer.cpp index 0a2570f798c..5185487fb55 100644 --- a/Tests/Particles/NeighborParticles/MDParticleContainer.cpp +++ b/Tests/Particles/NeighborParticles/MDParticleContainer.cpp @@ -5,10 +5,60 @@ #include "Constants.H" #include "CheckPair.H" +#include +#include +#include +#include +#include +#include +#include +#include + using namespace amrex; namespace { + using ParticleKey = std::pair; + + struct SourceParticleData + { + ParticleKey key; + IntVect cell; + int grid_id = -1; + std::array pos{}; + }; + + struct PackedSourceParticleData + { + Long id = 0; + int cpu = -1; + int grid_id = -1; + std::array cell{}; + std::array pos{}; + }; + + static_assert(std::is_trivially_copyable_v); + + struct TileParticleRecord + { + ParticleKey key; + int grid_id = -1; + IntVect shift = IntVect(0); + + bool operator< (TileParticleRecord const& other) const + { + return std::tie(key.first, key.second, grid_id, + AMREX_D_DECL(shift[0], shift[1], shift[2])) + < std::tie(other.key.first, other.key.second, other.grid_id, + AMREX_D_DECL(other.shift[0], other.shift[1], other.shift[2])); + } + + bool operator== (TileParticleRecord const& other) const + { + return key == other.key && grid_id == other.grid_id && shift == other.shift; + } + }; + void get_position_unit_cell(Real* r, const IntVect& nppc, int i_part) { int nx = nppc[0]; @@ -41,6 +91,28 @@ namespace u[1] = u_mean + uy_th; u[2] = u_mean + uz_th; } + + auto unpack_source_particles (std::vector const& packed_particles) + -> std::map + { + std::map source_particles; + + for (auto const& packed : packed_particles) { + ParticleKey key{packed.id, packed.cpu}; + SourceParticleData pdata; + pdata.key = key; + pdata.grid_id = packed.grid_id; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + pdata.cell[idim] = packed.cell[idim]; + pdata.pos[idim] = packed.pos[idim]; + } + auto [it, inserted] = source_particles.emplace(key, pdata); + amrex::ignore_unused(it); + AMREX_ALWAYS_ASSERT(inserted); + } + + return source_particles; + } } void @@ -237,55 +309,187 @@ void MDParticleContainer::writeParticles(int n) WriteAsciiFile(pltfile); } -void MDParticleContainer::checkNeighborParticles() +void MDParticleContainer::checkNeighborParticles(bool use_source_grid) { BL_PROFILE("MDParticleContainer::checkNeighborParticles"); const int lev = 0; + auto const& geom = Geom(lev); + auto const dxi = geom.InvCellSizeArray(); + auto const domain = geom.Domain(); + auto const problo = geom.ProbLoArray(); + auto const probhi = geom.ProbHiArray(); + const auto& pshifts = geom.periodicity().shiftIntVect(); auto& plev = GetParticles(lev); auto ngrids = int(ParticleBoxArray(0).size()); + std::vector local_source_particles; - amrex::Gpu::DeviceVector d_num_per_grid(ngrids,0); - amrex::Gpu::HostVector h_num_per_grid(ngrids); - - // CPU version for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) { - int gid = mfi.index(); + auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex()); + auto& ptile = plev[index]; + auto const& aos = ptile.GetArrayOfStructs(); + int const np = aos.numParticles(); - if (gid != 0) { continue; } - int tid = mfi.LocalTileIndex(); - auto index = std::make_pair(gid, tid); + if (np == 0) { continue; } + + Gpu::HostVector h_pstruct(np); + Gpu::copy(Gpu::deviceToHost, aos().dataPtr(), aos().dataPtr() + np, h_pstruct.begin()); + + for (int i = 0; i < np; ++i) { + auto const& p = h_pstruct[i]; + PackedSourceParticleData pdata; + pdata.id = p.id(); + pdata.cpu = p.cpu(); + pdata.grid_id = p.idata(0); + auto const cell = getParticleCell(p, problo, dxi, domain); + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + pdata.cell[idim] = cell[idim]; + pdata.pos[idim] = p.pos(idim); + } + local_source_particles.push_back(pdata); + } + } + + std::map source_particles; + if (ParallelDescriptor::NProcs() == 1) { + source_particles = unpack_source_particles(local_source_particles); + } else { + int const root = ParallelDescriptor::IOProcessorNumber(); + int const local_nbytes = + static_cast(local_source_particles.size() * sizeof(PackedSourceParticleData)); + + auto recvcounts = ParallelDescriptor::Gather(local_nbytes, root); + std::vector displs; + std::vector packed_bytes; + + if (ParallelDescriptor::MyProc() == root) { + displs.resize(recvcounts.size(), 0); + int offset = 0; + for (int i = 0, n = static_cast(recvcounts.size()); i < n; ++i) { + displs[i] = offset; + offset += recvcounts[i]; + } + packed_bytes.resize(offset); + } + + auto const* send_ptr = reinterpret_cast(local_source_particles.data()); + ParallelDescriptor::Gatherv(send_ptr, local_nbytes, + packed_bytes.data(), recvcounts, displs, root); + + int total_nbytes = static_cast(packed_bytes.size()); + ParallelDescriptor::Bcast(&total_nbytes, 1, root); + if (ParallelDescriptor::MyProc() != root) { + packed_bytes.resize(total_nbytes); + } + if (total_nbytes > 0) { + ParallelDescriptor::Bcast(packed_bytes.data(), total_nbytes, root); + } + + AMREX_ALWAYS_ASSERT(total_nbytes % sizeof(PackedSourceParticleData) == 0); + std::vector global_source_particles( + total_nbytes / static_cast(sizeof(PackedSourceParticleData))); + if (total_nbytes > 0) { + std::memcpy(global_source_particles.data(), packed_bytes.data(), total_nbytes); + } + source_particles = unpack_source_particles(global_source_particles); + } + auto match_shift = [&] (ParticleType const& p, SourceParticleData const& src, + Box const& grown_tile_box) -> IntVect + { + constexpr auto tol = ParticleReal(1.0e-12); + IntVect matched_shift(std::numeric_limits::max()); + int nmatches = 0; + for (auto const& shift : pshifts) { + if (!grown_tile_box.contains(src.cell + shift)) { continue; } + bool matched = true; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + ParticleReal expected_pos = src.pos[idim]; + if (shift[idim] > 0) { + expected_pos += static_cast(probhi[idim] - problo[idim]); + } else if (shift[idim] < 0) { + expected_pos -= static_cast(probhi[idim] - problo[idim]); + } + if (std::abs(p.pos(idim) - expected_pos) > tol) { + matched = false; + break; + } + } + if (matched) { + matched_shift = shift; + ++nmatches; + } + } + AMREX_ALWAYS_ASSERT(nmatches == 1); + return matched_shift; + }; + + for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + int const gid = mfi.index(); + int const tid = mfi.LocalTileIndex(); + auto index = std::make_pair(gid, tid); auto& ptile = plev[index]; - auto& aos = ptile.GetArrayOfStructs(); - auto& soa = ptile.GetStructOfArrays(); - const int np = aos.numTotalParticles(); + auto const& aos = ptile.GetArrayOfStructs(); + auto const& soa = ptile.GetStructOfArrays(); - ParticleType* pstruct = aos().dataPtr(); - auto* rdata = soa.GetRealData(0).dataPtr(); - auto* idata = soa.GetIntData(0).dataPtr(); + int const np_total = aos.numTotalParticles(); + if (np_total == 0) { continue; } - int* p_num_per_grid = d_num_per_grid.data(); - amrex::ParallelFor( np, [=] AMREX_GPU_DEVICE (int i) noexcept - { - ParticleType& p1 = pstruct[i]; - Gpu::Atomic::AddNoRet(&(p_num_per_grid[p1.idata(0)]),1); - AMREX_ALWAYS_ASSERT(p1.idata(0) == (int) rdata[i]); - AMREX_ALWAYS_ASSERT(p1.idata(0) == idata[i]); - }); + Gpu::HostVector h_pstruct(np_total); + Gpu::HostVector h_rdata(np_total); + Gpu::HostVector h_idata(np_total); + Gpu::copy(Gpu::deviceToHost, aos().dataPtr(), aos().dataPtr() + np_total, h_pstruct.begin()); + Gpu::copy(Gpu::deviceToHost, soa.GetRealData(0).begin(), soa.GetRealData(0).begin() + np_total, + h_rdata.begin()); + Gpu::copy(Gpu::deviceToHost, soa.GetIntData(0).begin(), soa.GetIntData(0).begin() + np_total, + h_idata.begin()); + + Box grown_tile_box = mfi.tilebox(); + if (hasNeighbors()) { + grown_tile_box.grow(m_num_neighbor_cells); + } - Gpu::copy(Gpu::deviceToHost, - d_num_per_grid.begin(), d_num_per_grid.end(), h_num_per_grid.begin()); - amrex::AllPrintToFile("neighbor_test") << "FOR GRID " << gid << "\n";; + std::vector expected_per_grid(ngrids, 0); + std::vector actual_per_grid(ngrids, 0); + std::vector expected_records; + std::vector actual_records; + + for (auto const& [key, src] : source_particles) { + for (auto const& shift : pshifts) { + if (!grown_tile_box.contains(src.cell + shift)) { continue; } + int const expected_grid = use_source_grid ? src.grid_id : gid; + expected_records.push_back({key, expected_grid, shift}); + ++expected_per_grid[expected_grid]; + } + } + + for (int i = 0; i < np_total; ++i) { + auto const& p = h_pstruct[i]; + ParticleKey key{p.id(), p.cpu()}; + auto it = source_particles.find(key); + AMREX_ALWAYS_ASSERT(it != source_particles.end()); + auto const& src = it->second; + int const expected_grid = use_source_grid ? src.grid_id : gid; + + AMREX_ALWAYS_ASSERT(p.idata(0) == expected_grid); + AMREX_ALWAYS_ASSERT(static_cast(h_rdata[i]) == expected_grid); + AMREX_ALWAYS_ASSERT(h_idata[i] == expected_grid); + + IntVect shift = match_shift(p, src, grown_tile_box); + actual_records.push_back({key, expected_grid, shift}); + ++actual_per_grid[expected_grid]; + } - for (int i = 0; i < ngrids; i++) { - amrex::AllPrintToFile("neighbor_test") << " there are " << h_num_per_grid[i] << " with grid id " << i << "\n"; + for (int igrid = 0; igrid < ngrids; ++igrid) { + AMREX_ALWAYS_ASSERT(actual_per_grid[igrid] == expected_per_grid[igrid]); } - amrex::AllPrintToFile("neighbor_test") << " \n"; - amrex::AllPrintToFile("neighbor_test") << " \n"; + std::sort(expected_records.begin(), expected_records.end()); + std::sort(actual_records.begin(), actual_records.end()); + AMREX_ALWAYS_ASSERT(actual_records == expected_records); } } diff --git a/Tests/Particles/NeighborParticles/inputs b/Tests/Particles/NeighborParticles/inputs index ba0d47c8373..8a76d87ead2 100644 --- a/Tests/Particles/NeighborParticles/inputs +++ b/Tests/Particles/NeighborParticles/inputs @@ -1,11 +1,12 @@ +particles.do_tiling = 1 +particles.tile_size = 4 4 4 + nbor_parts.size = (24, 24, 24) nbor_parts.max_grid_size = 8 nbor_parts.is_periodic = 1 -nbor_parts.num_ppc = 1 nbor_list.size = (24, 24, 24) nbor_list.max_grid_size = 8 nbor_list.is_periodic = 1 -nbor_list.num_ppc = 1 nbor_list.do_plotfile = 1 -nbor_list.check_answer = 1 \ No newline at end of file +nbor_list.check_answer = 1 diff --git a/Tests/Particles/NeighborParticles/main.cpp b/Tests/Particles/NeighborParticles/main.cpp index 0062a91e50f..cfff76cd5e0 100644 --- a/Tests/Particles/NeighborParticles/main.cpp +++ b/Tests/Particles/NeighborParticles/main.cpp @@ -15,12 +15,17 @@ struct TestParams { IntVect size; int max_grid_size; - int num_ppc; int is_periodic; int do_plotfile; int check_answer; }; +namespace +{ + // The test assumes we have 1 ppc when checking the answer + constexpr int fixed_num_ppc = 1; +} + void testNeighborParticles(); void testNeighborList(); @@ -43,7 +48,6 @@ void get_test_params(TestParams& params, const std::string& prefix) ParmParse pp(prefix); pp.get("size", params.size); pp.get("max_grid_size", params.max_grid_size); - pp.get("num_ppc", params.num_ppc); pp.get("is_periodic", params.is_periodic); params.do_plotfile = true; @@ -83,7 +87,7 @@ void testNeighborParticles () const int ncells = 1; MDParticleContainer pc(geom, dm, ba, ncells); - IntVect nppc(params.num_ppc); + IntVect nppc(fixed_num_ppc); if (ParallelDescriptor::MyProc() == dm[0]) { amrex::PrintToFile("neighbor_test") << "About to initialize particles \n"; @@ -118,7 +122,7 @@ void testNeighborParticles () if (ParallelDescriptor::MyProc() == dm[0]) { amrex::PrintToFile("neighbor_test") << "Check neighbors after reset ... \n"; } - pc.checkNeighborParticles(); + pc.checkNeighborParticles(false); if (ParallelDescriptor::MyProc() == dm[0]) { amrex::PrintToFile("neighbor_test") << "Now updateNeighbors again ... \n"; @@ -142,18 +146,21 @@ void testNeighborParticles () amrex::PrintToFile("neighbor_test") << "Moving particles and updating neighbors \n"; pc.moveParticles(static_cast (0.1)); pc.updateNeighbors(); + pc.checkNeighborParticles(); amrex::PrintToFile("neighbor_test") << "Min distance is " << pc.minAndMaxDistance() << ", should be (1, 1) \n"; amrex::PrintToFile("neighbor_test") << "Moving particles and updating neighbors again \n"; pc.moveParticles(static_cast (0.1)); pc.updateNeighbors(); + pc.checkNeighborParticles(); amrex::PrintToFile("neighbor_test") << "Min distance is " << pc.minAndMaxDistance() << ", should be (1, 1) \n"; amrex::PrintToFile("neighbor_test") << "Moving particles and updating neighbors yet again \n"; pc.moveParticles(static_cast (0.1)); pc.updateNeighbors(); + pc.checkNeighborParticles(); amrex::PrintToFile("neighbor_test") << "Min distance is " << pc.minAndMaxDistance() << ", should be (1, 1) \n"; } @@ -188,7 +195,7 @@ void testNeighborList () const int ncells = 1; MDParticleContainer pc(geom, dm, ba, ncells); - IntVect nppc(params.num_ppc); + IntVect nppc(fixed_num_ppc); amrex::PrintToFile("neighbor_test") << "About to initialize particles" << '\n'; @@ -205,7 +212,8 @@ void testNeighborList () pc.clearNeighbors(); pc.fillNeighbors(); pc.selectActualNeighbors(CheckPair()); - pc.updateNeighbors(); + pc.updateNeighbors(true); + pc.checkNeighborParticles(); pc.buildNeighborList(CheckPair()); if (params.check_answer) { pc.checkNeighborList();