Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 93 additions & 14 deletions Src/Particle/AMReX_NeighborParticlesGPUImpl.H
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,65 @@ namespace amrex {
/// \cond DOXYGEN_IGNORE
namespace detail
{
template <typename F>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void forEachIntersectingTile (IntVect const& iv, int nGrow,
Box const& grid_box, IntVect const& periodic_shift,
bool do_tiling, IntVect const& tile_size, F&& f)
{
Box cell_box(iv, iv);
cell_box.grow(nGrow);
cell_box += periodic_shift;
cell_box &= grid_box;

if (!cell_box.ok()) { return; }

auto const& cb_lo = cell_box.smallEnd();
auto const& cb_hi = cell_box.bigEnd();

#if (AMREX_SPACEDIM == 1)
for (int i = cb_lo[0]; i <= cb_hi[0]; ++i) {
IntVect cell(AMREX_D_DECL(i, 0, 0));
Box tbx;
int tile = getTileIndex(cell, grid_box, do_tiling, tile_size, tbx);
IntVect rep(AMREX_D_DECL(amrex::max(cb_lo[0], tbx.smallEnd(0)), 0, 0));
if (cell == rep) {
f(tile);
}
}
#elif (AMREX_SPACEDIM == 2)
for (int j = cb_lo[1]; j <= cb_hi[1]; ++j) {
for (int i = cb_lo[0]; i <= cb_hi[0]; ++i) {
IntVect cell(AMREX_D_DECL(i, j, 0));
Box tbx;
int tile = getTileIndex(cell, grid_box, do_tiling, tile_size, tbx);
IntVect rep(AMREX_D_DECL(amrex::max(cb_lo[0], tbx.smallEnd(0)),
amrex::max(cb_lo[1], tbx.smallEnd(1)),
0));
if (cell == rep) {
f(tile);
}
}
}
#else
for (int k = cb_lo[2]; k <= cb_hi[2]; ++k) {
for (int j = cb_lo[1]; j <= cb_hi[1]; ++j) {
for (int i = cb_lo[0]; i <= cb_hi[0]; ++i) {
IntVect cell(AMREX_D_DECL(i, j, k));
Box tbx;
int tile = getTileIndex(cell, grid_box, do_tiling, tile_size, tbx);
IntVect rep(AMREX_D_DECL(amrex::max(cb_lo[0], tbx.smallEnd(0)),
amrex::max(cb_lo[1], tbx.smallEnd(1)),
amrex::max(cb_lo[2], tbx.smallEnd(2))));
if (cell == rep) {
f(tile);
}
}
}
}
#endif
}

inline Vector<Box> getBoundaryBoxes(const Box& box, int ncells)
{
AMREX_ASSERT_WITH_MESSAGE(box.size() > 2*IntVect(AMREX_D_DECL(ncells, ncells, ncells)),
Expand Down Expand Up @@ -86,7 +145,6 @@ buildNeighborMask ()
{
int nbor_grid = isec.first;
const Box isec_box = isec.second - pshift;
if ( (grid == nbor_grid) && (pshift == 0)) { continue; }
neighbor_grids.insert(NeighborTask(nbor_grid, isec_box, pshift));
const int global_rank = dmap[nbor_grid];
neighbor_procs.push_back(ParallelContext::global_to_local_rank(global_rank));
Expand Down Expand Up @@ -173,7 +231,7 @@ buildNeighborCopyOp (bool use_boundary_neighbor)
const int nisec_box = m_isec_boxes[gid].size();
const bool do_tiling = this->do_tiling;
const IntVect tile_size = this->tile_size;
// auto p_code_offsets = m_code_offsets[gid].dataPtr();
const int nGrow = m_num_neighbor_cells;

AMREX_FOR_1D ( np, i,
{
Expand All @@ -186,7 +244,20 @@ buildNeighborCopyOp (bool use_boundary_neighbor)
IntVect iv = getParticleCell(p_ptr[pid], plo, dxi, domain);
for (int j=0; j<nisec_box; ++j) {
if (p_isec_boxes[j].contains(iv)) {
++p_counts[i];
detail::forEachIntersectingTile(iv, nGrow,
p_code_array[j].grid_box,
p_code_array[j].periodic_shift,
do_tiling, tile_size,
[&] (int dst_tile)
{
bool is_self = (p_code_array[j].grid_id == gid) && (dst_tile == tid)
AMREX_D_TERM( && (p_code_array[j].periodic_shift[0] == 0),
&& (p_code_array[j].periodic_shift[1] == 0),
&& (p_code_array[j].periodic_shift[2] == 0));
if (!is_self) {
++p_counts[i];
}
});
}
}
});
Expand Down Expand Up @@ -217,14 +288,25 @@ buildNeighborCopyOp (bool use_boundary_neighbor)
int k = p_offsets[i];
for (int j=0; j<nisec_box; ++j) {
if (p_isec_boxes[j].contains(iv)) {
p_boxes[k] = p_code_array[j].grid_id;
Box tbx;
p_tiles[k] = getTileIndex(iv, p_code_array[j].grid_box,
do_tiling, tile_size, tbx);
p_levs[k] = 0;
p_periodic_shift[k] = p_code_array[j].periodic_shift;
p_src_indices[k] = pid;
++k;
detail::forEachIntersectingTile(iv, nGrow,
p_code_array[j].grid_box,
p_code_array[j].periodic_shift,
do_tiling, tile_size,
[&] (int dst_tile)
{
bool is_self = (p_code_array[j].grid_id == gid) && (dst_tile == tid)
AMREX_D_TERM( && (p_code_array[j].periodic_shift[0] == 0),
&& (p_code_array[j].periodic_shift[1] == 0),
&& (p_code_array[j].periodic_shift[2] == 0));
if (!is_self) {
p_boxes[k] = p_code_array[j].grid_id;
p_tiles[k] = dst_tile;
p_levs[k] = 0;
p_periodic_shift[k] = p_code_array[j].periodic_shift;
p_src_indices[k] = pid;
++k;
}
});
}
}
AMREX_ALWAYS_ASSERT(k == p_offsets[i+1]);
Expand All @@ -243,9 +325,6 @@ fillNeighborsGPU ()

AMREX_ASSERT(numParticlesOutOfRange(*this, 0) == 0);

AMREX_ALWAYS_ASSERT_WITH_MESSAGE(this->do_tiling == 0,
"Tiling on the GPU is not supported for neighbor particles.");

buildNeighborMask();
this->defineBufferMap();

Expand Down
4 changes: 2 additions & 2 deletions Src/Particle/AMReX_NeighborParticlesI.H
Original file line number Diff line number Diff line change
Expand Up @@ -1005,8 +1005,8 @@ selectActualNeighbors (CheckPair const& check_pair, int num_cells)
const auto* pstruct = aos().dataPtr();
const auto ptile_data = this->ParticlesAt(lev, pti).getConstParticleTileData();

Box box = pti.validbox();
Box grownBox = pti.tilebox();
Box box = pti.tilebox();
Box grownBox = box;
grownBox.grow(computeRefFac(0, lev).max()*m_num_neighbor_cells);
const auto lo = lbound(grownBox);
const auto hi = ubound(grownBox);
Expand Down
2 changes: 1 addition & 1 deletion Tests/Particles/NeighborParticles/MDParticleContainer.H
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public:

void reset_test_id ();

void checkNeighborParticles ();
void checkNeighborParticles (bool use_source_grid = true);

void checkNeighborList ();

Expand Down
Loading
Loading