diff --git a/hoomd/mpcd/CMakeLists.txt b/hoomd/mpcd/CMakeLists.txt index 7ae9b10b8d..7761c99879 100644 --- a/hoomd/mpcd/CMakeLists.txt +++ b/hoomd/mpcd/CMakeLists.txt @@ -6,7 +6,6 @@ endif() set(_mpcd_cc_sources module.cc ATCollisionMethod.cc - CellCommunicator.cc CellThermoCompute.cc CellList.cc CollisionMethod.cc @@ -27,7 +26,6 @@ set(_mpcd_headers BounceBackNVE.h BulkGeometry.h BulkStreamingMethod.h - CellCommunicator.h CellThermoCompute.h CellList.h CollisionMethod.h @@ -88,7 +86,6 @@ if(ENABLE_HIP) BounceBackStreamingMethodGPU.cuh BounceBackStreamingMethodGPU.h BulkStreamingMethodGPU.h - CellCommunicator.cuh CellThermoComputeGPU.cuh CellThermoComputeGPU.h CellListGPU.cuh diff --git a/hoomd/mpcd/CellCommunicator.cc b/hoomd/mpcd/CellCommunicator.cc deleted file mode 100644 index ee52380b1c..0000000000 --- a/hoomd/mpcd/CellCommunicator.cc +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellCommunicator.cc - * \brief Definition of mpcd::CellCommunicator - */ - -#ifdef ENABLE_MPI - -#include "CellCommunicator.h" - -namespace hoomd - { -// initialize with zero instances of the communicator -unsigned int mpcd::CellCommunicator::num_instances = 0; - -/*! - * \param sysdef System definition - * \param cl MPCD cell list - */ -mpcd::CellCommunicator::CellCommunicator(std::shared_ptr sysdef, - std::shared_ptr cl) - : m_id(num_instances++), m_sysdef(sysdef), m_pdata(sysdef->getParticleData()), - m_exec_conf(m_pdata->getExecConf()), m_mpi_comm(m_exec_conf->getMPICommunicator()), - m_decomposition(m_pdata->getDomainDecomposition()), m_cl(cl), m_communicating(false), - m_send_buf(m_exec_conf), m_recv_buf(m_exec_conf), m_needs_init(true) - { - m_exec_conf->msg->notice(5) << "Constructing MPCD CellCommunicator" << std::endl; -#ifdef ENABLE_HIP - if (m_exec_conf->isCUDAEnabled()) - { - m_tuner_pack.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_cell_comm_pack_" + std::to_string(m_id))); - m_tuner_unpack.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_cell_comm_unpack_" + std::to_string(m_id))); - m_autotuners.insert(m_autotuners.end(), {m_tuner_pack, m_tuner_unpack}); - } -#endif // ENABLE_HIP - - m_cl->getSizeChangeSignal().connect( - this); - } - -mpcd::CellCommunicator::~CellCommunicator() - { - m_exec_conf->msg->notice(5) << "Destroying MPCD CellCommunicator" << std::endl; - m_cl->getSizeChangeSignal() - .disconnect(this); - } - -namespace mpcd - { -namespace detail - { -//! Unary operator to wrap global cell indexes into the local domain -struct LocalCellWrapOp - { - LocalCellWrapOp(std::shared_ptr cl_) - : cl(cl_), ci(cl_->getCellIndexer()), gci(cl_->getGlobalCellIndexer()) - { - } - - //! Transform the global 1D cell index into a local 1D cell index - inline unsigned int operator()(unsigned int cell_idx) - { - // convert the 1D global cell index to a global cell tuple - const uint3 cell = gci.getTriple(cell_idx); - - // convert the global cell tuple to a local cell tuple - int3 local_cell = cl->getLocalCell(make_int3(cell.x, cell.y, cell.z)); - - // wrap the local cell through the global boundaries, which should work for all reasonable - // cell comms. - if (local_cell.x >= (int)gci.getW()) - local_cell.x -= gci.getW(); - else if (local_cell.x < 0) - local_cell.x += gci.getW(); - - if (local_cell.y >= (int)gci.getH()) - local_cell.y -= gci.getH(); - else if (local_cell.y < 0) - local_cell.y += gci.getH(); - - if (local_cell.z >= (int)gci.getD()) - local_cell.z -= gci.getD(); - else if (local_cell.z < 0) - local_cell.z += gci.getD(); - - // convert the local cell tuple back to an index - return ci(local_cell.x, local_cell.y, local_cell.z); - } - - std::shared_ptr cl; //!< Cell list - const Index3D ci; //!< Cell indexer - const Index3D gci; //!< Global cell indexer - }; - } // end namespace detail - } // end namespace mpcd - -void mpcd::CellCommunicator::initialize() - { - // obtain domain decomposition - const Index3D& di = m_decomposition->getDomainIndexer(); - ArrayHandle h_cart_ranks(m_decomposition->getCartRanks(), - access_location::host, - access_mode::read); - const uint3 my_pos = m_decomposition->getGridPos(); - - // use the cell list to compute the bounds - const Index3D& ci = m_cl->getCellIndexer(); - const Index3D& global_ci = m_cl->getGlobalCellIndexer(); - auto num_comm_cells = m_cl->getNComm(); - const uint3 max_lo - = make_uint3(num_comm_cells[static_cast(mpcd::detail::face::west)], - num_comm_cells[static_cast(mpcd::detail::face::south)], - num_comm_cells[static_cast(mpcd::detail::face::down)]); - const uint3 min_hi = make_uint3( - ci.getW() - num_comm_cells[static_cast(mpcd::detail::face::east)], - ci.getH() - num_comm_cells[static_cast(mpcd::detail::face::north)], - ci.getD() - num_comm_cells[static_cast(mpcd::detail::face::up)]); - - // check to make sure box is not overdecomposed - { - const unsigned int nextra = m_cl->getNExtraCells(); - unsigned int err = ((max_lo.x + nextra) > min_hi.x || (max_lo.y + nextra) > min_hi.y - || (max_lo.z + nextra) > min_hi.z); - MPI_Allreduce(MPI_IN_PLACE, &err, 1, MPI_UNSIGNED, MPI_MAX, m_mpi_comm); - if (err) - { - m_exec_conf->msg->error() - << "mpcd: Simulation box is overdecomposed, decrease the number of ranks." - << std::endl; - throw std::runtime_error("Simulation box is overdecomposed for MPCD"); - } - } - - // loop over all cells in the grid and determine where to send them - std::multimap send_map; - std::set neighbors; - for (unsigned int k = 0; k < ci.getD(); ++k) - { - for (unsigned int j = 0; j < ci.getH(); ++j) - { - for (unsigned int i = 0; i < ci.getW(); ++i) - { - // skip any cells interior to the grid, which will not be communicated - // this is wasteful loop logic, but initialize will only be called rarely - if (i >= max_lo.x && i < min_hi.x && j >= max_lo.y && j < min_hi.y && k >= max_lo.z - && k < min_hi.z) - continue; - - // obtain the 1D global index of this cell - const int3 global_cell = m_cl->getGlobalCell(make_int3(i, j, k)); - const unsigned int global_cell_idx - = global_ci(global_cell.x, global_cell.y, global_cell.z); - - // check which direction the cell lies off rank in x,y,z - std::vector dx = {0}; - if (i < max_lo.x) - dx.push_back(-1); - else if (i >= min_hi.x) - dx.push_back(1); - - std::vector dy = {0}; - if (j < max_lo.y) - dy.push_back(-1); - else if (j >= min_hi.y) - dy.push_back(1); - - std::vector dz = {0}; - if (k < max_lo.z) - dz.push_back(-1); - else if (k >= min_hi.z) - dz.push_back(1); - - // generate all permutations of these neighbors for the cell - for (auto ddx = dx.begin(); ddx != dx.end(); ++ddx) - { - for (auto ddy = dy.begin(); ddy != dy.end(); ++ddy) - { - for (auto ddz = dz.begin(); ddz != dz.end(); ++ddz) - { - // skip self - if (*ddx == 0 && *ddy == 0 && *ddz == 0) - continue; - - // get neighbor rank tuple - int3 neigh = make_int3((int)my_pos.x + *ddx, - (int)my_pos.y + *ddy, - (int)my_pos.z + *ddz); - - // wrap neighbor through the boundaries - if (neigh.x < 0) - neigh.x += di.getW(); - else if (neigh.x >= (int)di.getW()) - neigh.x -= di.getW(); - - if (neigh.y < 0) - neigh.y += di.getH(); - else if (neigh.y >= (int)di.getH()) - neigh.y -= di.getH(); - - if (neigh.z < 0) - neigh.z += di.getD(); - else if (neigh.z >= (int)di.getD()) - neigh.z -= di.getD(); - - // convert neighbor to a linear rank and push it into the unique - // neighbor set - const unsigned int neigh_rank - = h_cart_ranks.data[di(neigh.x, neigh.y, neigh.z)]; - neighbors.insert(neigh_rank); - send_map.insert(std::make_pair(neigh_rank, global_cell_idx)); - } // ddz - } // ddy - } // ddx - } // i - } // j - } // k - - // allocate send / receive index arrays - { - GPUArray send_idx(send_map.size(), m_exec_conf); - m_send_idx.swap(send_idx); - } - - // fill the send indexes with the global values - // flood the array of unique neighbors and count the number to send - { - ArrayHandle h_send_idx(m_send_idx, - access_location::host, - access_mode::overwrite); - unsigned int idx = 0; - for (auto it = send_map.begin(); it != send_map.end(); ++it) - { - h_send_idx.data[idx++] = it->second; - } - - m_neighbors.resize(neighbors.size()); - m_begin.resize(m_neighbors.size()); - m_num_send.resize(m_neighbors.size()); - idx = 0; - for (auto it = neighbors.begin(); it != neighbors.end(); ++it) - { - auto lower = send_map.lower_bound(*it); - auto upper = send_map.upper_bound(*it); - - m_neighbors[idx] = *it; - m_begin[idx] = (unsigned int)std::distance(send_map.begin(), lower); - m_num_send[idx] = (unsigned int)std::distance(lower, upper); - ++idx; - } - } - - // send / receive the global cell indexes to be communicated with neighbors - std::vector recv_idx(m_send_idx.getNumElements()); - { - ArrayHandle h_send_idx(m_send_idx, access_location::host, access_mode::read); - - m_reqs.resize(2 * m_neighbors.size()); - for (unsigned int idx = 0; idx < m_neighbors.size(); ++idx) - { - const unsigned int offset = m_begin[idx]; - MPI_Isend(h_send_idx.data + offset, - m_num_send[idx], - MPI_INT, - m_neighbors[idx], - 0, - m_mpi_comm, - &m_reqs[2 * idx]); - MPI_Irecv(recv_idx.data() + offset, - m_num_send[idx], - MPI_INT, - m_neighbors[idx], - 0, - m_mpi_comm, - &m_reqs[2 * idx + 1]); - } - MPI_Waitall((unsigned int)m_reqs.size(), m_reqs.data(), MPI_STATUSES_IGNORE); - } - - // transform all of the global cell indexes back into local cell indexes - { - ArrayHandle h_send_idx(m_send_idx, - access_location::host, - access_mode::readwrite); - - mpcd::detail::LocalCellWrapOp wrapper(m_cl); - std::transform(h_send_idx.data, - h_send_idx.data + m_send_idx.getNumElements(), - h_send_idx.data, - wrapper); - std::transform(recv_idx.begin(), recv_idx.end(), recv_idx.begin(), wrapper); - } - - // map the received cells from a rank-basis to a cell-basis - { - std::multimap cell_map; - std::set unique_cells; - for (unsigned int idx = 0; idx < recv_idx.size(); ++idx) - { - const unsigned int cell = recv_idx[idx]; - unique_cells.insert(cell); - cell_map.insert(std::make_pair(cell, idx)); - } - m_num_cells = (unsigned int)unique_cells.size(); - - /* - * Allocate auxiliary memory for receiving cell reordering - */ - { - GPUArray recv(recv_idx.size(), m_exec_conf); - m_recv.swap(recv); - - GPUArray cells(m_num_cells, m_exec_conf); - m_cells.swap(cells); - - GPUArray recv_begin(m_num_cells, m_exec_conf); - m_recv_begin.swap(recv_begin); - - GPUArray recv_end(m_num_cells, m_exec_conf); - m_recv_end.swap(recv_end); - } - - /* - * Generate the compacted list of unique cells - */ - ArrayHandle h_cells(m_cells, access_location::host, access_mode::overwrite); - unsigned int idx = 0; - for (auto it = unique_cells.begin(); it != unique_cells.end(); ++it) - { - h_cells.data[idx++] = *it; - } - - /* - * Loop over the cell map to do run-length encoding on the keys. This - * determines the range of data belonging to each received cell. - */ - ArrayHandle h_recv(m_recv, access_location::host, access_mode::overwrite); - ArrayHandle h_recv_begin(m_recv_begin, - access_location::host, - access_mode::overwrite); - ArrayHandle h_recv_end(m_recv_end, - access_location::host, - access_mode::overwrite); - unsigned int last_cell = UINT_MAX; - unsigned int cell_idx = 0; - idx = 0; - h_recv_begin.data[cell_idx] = idx; - for (auto it = cell_map.begin(); it != cell_map.end(); ++it) - { - // record the sorted receive index - h_recv.data[idx] = it->second; - - // if not very first pass and the current cell does not match the - // last cell, then we are on a new cell, and need to demark an end / begin - if (last_cell != UINT_MAX && it->first != last_cell) - { - h_recv_end.data[cell_idx] = idx; - h_recv_begin.data[++cell_idx] = idx; - } - last_cell = it->first; - - ++idx; - } - h_recv_end.data[cell_idx] = idx; - } - } - } // end namespace hoomd - -#endif // ENABLE_MPI diff --git a/hoomd/mpcd/CellCommunicator.cuh b/hoomd/mpcd/CellCommunicator.cuh deleted file mode 100644 index 3bba09f5f6..0000000000 --- a/hoomd/mpcd/CellCommunicator.cuh +++ /dev/null @@ -1,227 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellCommunicator.cuh - * \brief Declaration of CUDA kernels for mpcd::CellCommunicator - */ - -#ifndef MPCD_CELL_COMMUNICATOR_CUH_ -#define MPCD_CELL_COMMUNICATOR_CUH_ - -#include "hoomd/HOOMDMath.h" -#include "hoomd/Index1D.h" - -#include - -namespace hoomd - { -namespace mpcd - { -namespace gpu - { -//! Kernel driver to pack cell communication buffer -template -cudaError_t __attribute__((visibility("default"))) -pack_cell_buffer(typename PackOpT::element* d_send_buf, - const T* d_props, - const unsigned int* d_send_idx, - const PackOpT op, - const unsigned int num_send, - unsigned int block_size); - -//! Kernel driver to unpack cell communication buffer -template -cudaError_t __attribute__((visibility("default"))) -unpack_cell_buffer(T* d_props, - const unsigned int* d_cells, - const unsigned int* d_recv, - const unsigned int* d_recv_begin, - const unsigned int* d_recv_end, - const typename PackOpT::element* d_recv_buf, - const PackOpT op, - const unsigned int num_cells, - const unsigned int block_size); - -#ifdef __HIPCC__ - -namespace kernel - { -//! Kernel to pack cell communication buffer -/*! - * \param d_send_buf Send buffer to pack (output) - * \param d_props Cell property to pack - * \param d_send_idx Cell indexes to pack into the buffer - * \param op Pack operator - * \param num_send Number of cells to pack - * \param block_size Number of threads per block - * - * \tparam T Type of data to pack (inferred) - * \tparam PackOpT Pack operation type (inferred) - * - * \b Implementation details: - * Using one thread per cell to pack, each cell to send has its properties read - * and packed into the send buffer. - */ -template -__global__ void pack_cell_buffer(typename PackOpT::element* d_send_buf, - const T* d_props, - const unsigned int* d_send_idx, - const PackOpT op, - const unsigned int num_send) - { - // one thread per buffer cell - unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= num_send) - return; - - const unsigned int send_idx = d_send_idx[idx]; - d_send_buf[idx] = op.pack(d_props[send_idx]); - } - -//! Kernel to unpack cell communication buffer -/*! - * \param d_props Cell property array to unpack into - * \param d_cells List of unique cells to unpack - * \param d_recv Map of unique cells onto the receive buffer - * \param d_recv_begin Beginning index into \a d_recv for cells to unpack - * \param d_recv_end End index (exclusive) into \a d_recv for cells to unpack - * \param d_recv_buf Received buffer from neighbor ranks - * \param op Packing operator - * \param num_cells Number of cells to unpack - * - * \tparam T Data type to unpack (inferred) - * \tparam PackOpT Pack operation type (inferred) - * - * \b Implementation details: - * Using one thread per cell to unpack, each cell iterates over the cells it - * has received, and applies the unpacking operator to each in turn. - */ -template -__global__ void unpack_cell_buffer(T* d_props, - const unsigned int* d_cells, - const unsigned int* d_recv, - const unsigned int* d_recv_begin, - const unsigned int* d_recv_end, - const typename PackOpT::element* d_recv_buf, - const PackOpT op, - const unsigned int num_cells) - { - // one thread per particle - unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= num_cells) - return; - - const unsigned int cell_idx = d_cells[idx]; - const unsigned int begin = d_recv_begin[idx]; - const unsigned int end = d_recv_end[idx]; - - // loop through all received data for this cell, and unpack it iteratively - T val = d_props[cell_idx]; - for (unsigned int i = begin; i < end; ++i) - { - val = op.unpack(d_recv_buf[d_recv[i]], val); - } - - // save the accumulated unpacked value - d_props[cell_idx] = val; - } - - } // end namespace kernel - -/*! - * \param d_send_buf Send buffer to pack (output) - * \param d_props Cell property to pack - * \param d_send_idx Cell indexes to pack into the buffer - * \param op Pack operator - * \param num_send Number of cells to pack - * \param block_size Number of threads per block - * - * \tparam T Type of data to pack (inferred) - * \tparam PackOpT Pack operator type - * - * \returns cudaSuccess on completion - * - * \sa mpcd::gpu::kernel::pack_cell_buffer - */ -template -cudaError_t pack_cell_buffer(typename PackOpT::element* d_send_buf, - const T* d_props, - const unsigned int* d_send_idx, - const PackOpT op, - const unsigned int num_send, - unsigned int block_size) - { - // determine runtime block size - unsigned int max_block_size; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::pack_cell_buffer); - max_block_size = attr.maxThreadsPerBlock; - - const unsigned int run_block_size = min(block_size, max_block_size); - - dim3 grid(num_send / run_block_size + 1); - mpcd::gpu::kernel::pack_cell_buffer<<>>(d_send_buf, - d_props, - d_send_idx, - op, - num_send); - - return cudaSuccess; - } - -/*! - * \param d_props Cell property array to unpack into - * \param d_cells List of unique cells to unpack - * \param d_recv Map of unique cells onto the receive buffer - * \param d_recv_begin Beginning index into \a d_recv for cells to unpack - * \param d_recv_end End index (exclusive) into \a d_recv for cells to unpack - * \param d_recv_buf Received buffer from neighbor ranks - * \param op Packing operator - * \param num_cells Number of cells to unpack - * - * \tparam T Data type to unpack (inferred) - * \tparam PackOpT Pack operation type (inferred) - * - * \returns cudaSuccess on completion - * - * \sa mpcd::gpu::kernel::unpack_cell_buffer - */ -template -cudaError_t unpack_cell_buffer(T* d_props, - const unsigned int* d_cells, - const unsigned int* d_recv, - const unsigned int* d_recv_begin, - const unsigned int* d_recv_end, - const typename PackOpT::element* d_recv_buf, - const PackOpT op, - const unsigned int num_cells, - const unsigned int block_size) - { - // determine runtime block size - unsigned int max_block_size; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::unpack_cell_buffer); - max_block_size = attr.maxThreadsPerBlock; - - const unsigned int run_block_size = min(block_size, max_block_size); - - dim3 grid(num_cells / run_block_size + 1); - mpcd::gpu::kernel::unpack_cell_buffer<<>>(d_props, - d_cells, - d_recv, - d_recv_begin, - d_recv_end, - d_recv_buf, - op, - num_cells); - - return cudaSuccess; - } -#endif // __HIPCC__ - - } // end namespace gpu - } // end namespace mpcd - } // end namespace hoomd - -#endif // MPCD_CELL_COMMUNICATOR_CUH_ diff --git a/hoomd/mpcd/CellCommunicator.h b/hoomd/mpcd/CellCommunicator.h deleted file mode 100644 index ea78f0f8d8..0000000000 --- a/hoomd/mpcd/CellCommunicator.h +++ /dev/null @@ -1,437 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellCommunicator.h - * \brief Declares and defines the mpcd::CellCommunicator class - */ - -#ifdef ENABLE_MPI - -#ifndef MPCD_CELL_COMMUNICATOR_H_ -#define MPCD_CELL_COMMUNICATOR_H_ - -#ifdef __HIPCC__ -#error This header cannot be compiled by nvcc -#endif - -#ifdef ENABLE_HIP -#include "CellCommunicator.cuh" -#endif // ENABLE_HIP - -#include "CellList.h" -#include "CommunicatorUtilities.h" - -#include "hoomd/Autotuned.h" -#include "hoomd/DomainDecomposition.h" -#include "hoomd/GPUArray.h" -#include "hoomd/GPUVector.h" -#include "hoomd/HOOMDMath.h" -#include "hoomd/Index1D.h" -#include "hoomd/SystemDefinition.h" - -#include -#include - -namespace hoomd - { -namespace mpcd - { -//! Communicates properties across the MPCD cell list -class PYBIND11_EXPORT CellCommunicator : public Autotuned - { - public: - //! Constructor - CellCommunicator(std::shared_ptr sysdef, std::shared_ptr cl); - - //! Destructor - virtual ~CellCommunicator(); - - //! Reduce cell list properties - /*! - * \param props Properties to reduce - * \param op Binary reduction operator - * - * Properties are reduced across domain boundaries, as marked in the cell list. - */ - template void communicate(const GPUArray& props, const PackOpT op) - { - begin(props, op); - finalize(props, op); - } - - //! Begin communication of the grid - template void begin(const GPUArray& props, const PackOpT op); - - //! Finalize communication of the grid - template void finalize(const GPUArray& props, const PackOpT op); - - //! Get the number of unique cells with communication - unsigned int getNCells() - { - if (m_needs_init) - { - initialize(); - m_needs_init = false; - } - return m_num_cells; - } - - //! Get the list of unique cells with communication - const GPUArray& getCells() - { - if (m_needs_init) - { - initialize(); - m_needs_init = false; - } - return m_cells; - } - - private: - static unsigned int num_instances; //!< Number of communicator instances - const unsigned int m_id; //!< Id for this communicator to use in tags - - std::shared_ptr m_sysdef; //!< System definition - std::shared_ptr m_pdata; //!< HOOMD particle data - std::shared_ptr m_exec_conf; //!< Execution configuration - const MPI_Comm m_mpi_comm; //!< MPI Communicator - std::shared_ptr m_decomposition; //!< Domain decomposition - - std::shared_ptr m_cl; //!< MPCD cell list - - bool m_communicating; //!< Flag if communication is occurring - GPUVector m_send_buf; //!< Send buffer - GPUVector m_recv_buf; //!< Receive buffer - GPUArray m_send_idx; //!< Indexes of cells in send buffer - std::vector m_reqs; //!< MPI request objects - - std::vector m_neighbors; //!< Unique neighbor ranks - std::vector m_begin; //!< Begin offset of every neighbor - std::vector m_num_send; //!< Number of cells to send to every neighbor - - unsigned int m_num_cells; //!< Number of unique cells to receive - GPUArray m_cells; //!< Unique cells to receive - GPUArray - m_recv; //!< Reordered mapping of buffer from ranks to group received cells together - GPUArray m_recv_begin; //!< Begin offset of every unique cell - GPUArray m_recv_end; //!< End offset of every unique cell - - bool m_needs_init; //!< Flag if grid needs to be initialized - //! Slot that communicator needs to be reinitialized - void slotInit() - { - m_needs_init = true; - } - - //! Initialize the grid - void initialize(); - - //! Packs the property buffer - template void packBuffer(const GPUArray& props, const PackOpT op); - - //! Unpacks the property buffer - template - void unpackBuffer(const GPUArray& props, const PackOpT op); - -#ifdef ENABLE_HIP - std::shared_ptr> m_tuner_pack; //!< Tuner for pack kernel - std::shared_ptr> m_tuner_unpack; //!< Tuner for unpack kernel - - //! Packs the property buffer on the GPU - template - void packBufferGPU(const GPUArray& props, const PackOpT op); - - //! Unpacks the property buffer on the GPU - template - void unpackBufferGPU(const GPUArray& props, const PackOpT op); -#endif // ENABLE_HIP - }; - - } // end namespace mpcd - -/*! - * \param props Property buffer to pack - * \param op Packing operator to apply to the send buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * The data in \a props is packed into the send buffers, and nonblocking MPI - * send / receive operations are initiated. If communication is already occurring, - * the method returns immediately and no action is taken. - */ -template -void mpcd::CellCommunicator::begin(const GPUArray& props, const PackOpT op) - { - // check if communication is occurring and place a lock on - if (m_communicating) - return; - m_communicating = true; - - // ensure that the property grid is sufficiently sized compared to the cell list - if (props.getNumElements() < m_cl->getNCells()) - { - m_exec_conf->msg->error() - << "mpcd: cell property to be reduced is smaller than cell list dimensions" - << std::endl; - throw std::runtime_error("MPCD cell property has insufficient dimensions"); - } - - // initialize grid if required - if (m_needs_init) - { - initialize(); - m_needs_init = false; - } - - // resize send / receive buffers for this comm element - m_send_buf.resize(m_send_idx.getNumElements() * sizeof(typename PackOpT::element)); - m_recv_buf.resize(m_send_idx.getNumElements() * sizeof(typename PackOpT::element)); - -#ifdef ENABLE_HIP - if (m_exec_conf->isCUDAEnabled()) - { - packBufferGPU(props, op); - } - else -#endif // ENABLE_HIP - { - packBuffer(props, op); - } - - // make the MPI calls - { - // determine whether to use CPU or GPU CUDA buffers - access_location::Enum mpi_loc; - { - mpi_loc = access_location::host; - } - - ArrayHandle h_send_buf(m_send_buf, mpi_loc, access_mode::read); - ArrayHandle h_recv_buf(m_recv_buf, mpi_loc, access_mode::overwrite); - typename PackOpT::element* send_buf - = reinterpret_cast(h_send_buf.data); - typename PackOpT::element* recv_buf - = reinterpret_cast(h_recv_buf.data); - - m_reqs.resize(2 * m_neighbors.size()); - for (unsigned int idx = 0; idx < m_neighbors.size(); ++idx) - { - const unsigned int neigh = m_neighbors[idx]; - const unsigned int offset = m_begin[idx]; - const size_t num_bytes = sizeof(typename PackOpT::element) * m_num_send[idx]; - MPI_Isend(send_buf + offset, - (unsigned int)num_bytes, - MPI_BYTE, - neigh, - 0, - m_mpi_comm, - &m_reqs[2 * idx]); - MPI_Irecv(recv_buf + offset, - (unsigned int)num_bytes, - MPI_BYTE, - neigh, - 0, - m_mpi_comm, - &m_reqs[2 * idx + 1]); - } - } - } - -//! Finalize communication of the grid -/*! - * \param props Property buffer to unpack - * \param op Packing operator to apply to the receive buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * Existing MPI requests are finalized, and then the received buffers are - * unpacked into \a props. If no communication is ongoing, the method immediately - * returns and takes no action. - */ -template -void mpcd::CellCommunicator::finalize(const GPUArray& props, const PackOpT op) - { - if (!m_communicating) - return; - - // finish all MPI requests - MPI_Waitall((unsigned int)m_reqs.size(), m_reqs.data(), MPI_STATUSES_IGNORE); - -// unpack the buffer -#ifdef ENABLE_HIP - if (m_exec_conf->isCUDAEnabled()) - { - unpackBufferGPU(props, op); - } - else -#endif // ENABLE_HIP - { - unpackBuffer(props, op); - } - - m_communicating = false; - } - -/*! - * \param props Property buffer to pack - * \param op Packing operator to apply to the send buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * The packing operator applies a unary pack functor to \a props, which permits - * a reduction or transformation of the data to be sent. See mpcd::detail::CellEnergyPackOp - * for an example. - * - * \post Communicated cells in \a props are packed into \a m_send_buf. - */ -template -void mpcd::CellCommunicator::packBuffer(const GPUArray& props, const PackOpT op) - { - ArrayHandle h_props(props, access_location::host, access_mode::read); - ArrayHandle h_send_idx(m_send_idx, access_location::host, access_mode::read); - ArrayHandle h_send_buf(m_send_buf, - access_location::host, - access_mode::overwrite); - typename PackOpT::element* send_buf - = reinterpret_cast(h_send_buf.data); - - for (unsigned int idx = 0; idx < m_send_idx.getNumElements(); ++idx) - { - send_buf[idx] = op.pack(h_props.data[h_send_idx.data[idx]]); - } - } - -/*! - * \param props Property buffer to pack - * \param op Packing operator to apply to the received buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * The packing operator must be associative so that the order it is applied - * to the received cells does not matter, e.g., addition. - * - * \post The bytes from \a m_recv_buf are unpacked into \a props. - */ -template -void mpcd::CellCommunicator::unpackBuffer(const GPUArray& props, const PackOpT op) - { - ArrayHandle h_recv(m_recv, access_location::host, access_mode::read); - ArrayHandle h_cells(m_cells, access_location::host, access_mode::read); - ArrayHandle h_recv_begin(m_recv_begin, access_location::host, access_mode::read); - ArrayHandle h_recv_end(m_recv_end, access_location::host, access_mode::read); - - ArrayHandle h_recv_buf(m_recv_buf, access_location::host, access_mode::read); - typename PackOpT::element* recv_buf - = reinterpret_cast(h_recv_buf.data); - - ArrayHandle h_props(props, access_location::host, access_mode::readwrite); - - for (unsigned int idx = 0; idx < m_num_cells; ++idx) - { - const unsigned int cell_idx = h_cells.data[idx]; - const unsigned int begin = h_recv_begin.data[idx]; - const unsigned int end = h_recv_end.data[idx]; - - // loop through all received data for this cell, and unpack it iteratively - T val = h_props.data[cell_idx]; - for (unsigned int i = begin; i < end; ++i) - { - val = op.unpack(recv_buf[h_recv.data[i]], val); - } - - // save the accumulated unpacked value - h_props.data[cell_idx] = val; - } - } - -#ifdef ENABLE_HIP -/*! - * \param props Property buffer to pack - * \param op Packing operator to apply to the send buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * The packing operator applies a unary pack functor to \a props, which permits - * a reduction or transformation of the data to be sent. See mpcd::detail::CellEnergyPackOp - * for an example. - * - * \post Communicated cells in \a props are packed into \a m_send_buf. - */ -template -void mpcd::CellCommunicator::packBufferGPU(const GPUArray& props, const PackOpT op) - { - ArrayHandle d_props(props, access_location::device, access_mode::read); - ArrayHandle d_send_idx(m_send_idx, access_location::device, access_mode::read); - ArrayHandle d_send_buf(m_send_buf, - access_location::device, - access_mode::overwrite); - typename PackOpT::element* send_buf - = reinterpret_cast(d_send_buf.data); - - m_tuner_pack->begin(); - mpcd::gpu::pack_cell_buffer(send_buf, - d_props.data, - d_send_idx.data, - op, - (unsigned int)m_send_idx.getNumElements(), - m_tuner_pack->getParam()[0]); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_tuner_pack->end(); - } - -/*! - * \param props Property buffer to pack - * \param op Packing operator to apply to the received buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * The packing operator must be associative so that the order it is applied - * to the received cells does not matter, e.g., addition. - * - * \post The bytes from \a m_recv_buf are unpacked into \a props. - */ -template -void mpcd::CellCommunicator::unpackBufferGPU(const GPUArray& props, const PackOpT op) - { - ArrayHandle d_recv(m_recv, access_location::device, access_mode::read); - ArrayHandle d_cells(m_cells, access_location::device, access_mode::read); - ArrayHandle d_recv_begin(m_recv_begin, - access_location::device, - access_mode::read); - ArrayHandle d_recv_end(m_recv_end, access_location::device, access_mode::read); - - ArrayHandle d_recv_buf(m_recv_buf, access_location::device, access_mode::read); - typename PackOpT::element* recv_buf - = reinterpret_cast(d_recv_buf.data); - - ArrayHandle d_props(props, access_location::device, access_mode::readwrite); - - m_tuner_unpack->begin(); - mpcd::gpu::unpack_cell_buffer(d_props.data, - d_cells.data, - d_recv.data, - d_recv_begin.data, - d_recv_end.data, - recv_buf, - op, - m_num_cells, - m_tuner_unpack->getParam()[0]); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_tuner_unpack->end(); - } -#endif // ENABLE_HIP - - } // end namespace hoomd - -#endif // MPCD_CELL_COMMUNICATOR_H_ - -#endif // ENABLE_MPI diff --git a/hoomd/mpcd/CellList.cc b/hoomd/mpcd/CellList.cc index 58bacdd3a1..bc47e1c6cd 100644 --- a/hoomd/mpcd/CellList.cc +++ b/hoomd/mpcd/CellList.cc @@ -20,7 +20,8 @@ namespace hoomd mpcd::CellList::CellList(std::shared_ptr sysdef, Scalar cell_size, bool shift) : Compute(sysdef), m_mpcd_pdata(m_sysdef->getMPCDParticleData()), m_cell_np_max(4), m_cell_np(m_exec_conf), m_cell_list(m_exec_conf), m_embed_cell_ids(m_exec_conf), - m_conditions(m_exec_conf), m_needs_compute_dim(true), m_particles_sorted(false), + m_conditions(m_exec_conf), m_cell_vel(m_exec_conf), m_cell_energy(m_exec_conf), + m_needs_compute_dim(true), m_property_sum(false), m_particles_sorted(false), m_virtual_change(false) { assert(m_mpcd_pdata); @@ -40,8 +41,6 @@ mpcd::CellList::CellList(std::shared_ptr sysdef, Scalar cell_s m_num_extra = 0; m_cover_box = m_pdata->getBox(); #endif // ENABLE_MPI - - m_mpcd_pdata->getSortSignal().connect(this); m_mpcd_pdata->getNumVirtualSignal().connect( this); m_pdata->getParticleSortSignal().connect(this); @@ -53,7 +52,8 @@ mpcd::CellList::CellList(std::shared_ptr sysdef, bool shift) : Compute(sysdef), m_mpcd_pdata(m_sysdef->getMPCDParticleData()), m_cell_np_max(4), m_cell_np(m_exec_conf), m_cell_list(m_exec_conf), m_embed_cell_ids(m_exec_conf), - m_conditions(m_exec_conf), m_needs_compute_dim(true), m_particles_sorted(false), + m_conditions(m_exec_conf), m_cell_vel(m_exec_conf), m_cell_energy(m_exec_conf), + m_needs_compute_dim(true), m_property_sum(false), m_particles_sorted(false), m_virtual_change(false) { assert(m_mpcd_pdata); @@ -73,8 +73,6 @@ mpcd::CellList::CellList(std::shared_ptr sysdef, m_num_extra = 0; m_cover_box = m_pdata->getBox(); #endif // ENABLE_MPI - - m_mpcd_pdata->getSortSignal().connect(this); m_mpcd_pdata->getNumVirtualSignal().connect( this); m_pdata->getParticleSortSignal().connect(this); @@ -84,7 +82,6 @@ mpcd::CellList::CellList(std::shared_ptr sysdef, mpcd::CellList::~CellList() { m_exec_conf->msg->notice(5) << "Destroying MPCD CellList" << std::endl; - m_mpcd_pdata->getSortSignal().disconnect(this); m_mpcd_pdata->getNumVirtualSignal().disconnect( this); m_pdata->getParticleSortSignal().disconnect(this); @@ -113,6 +110,9 @@ void mpcd::CellList::compute(uint64_t timestep) m_force_compute = true; } + // ensure optional flags are up to date + updateFlags(); + if (peekCompute(timestep)) { // ensure grid is shifted @@ -151,7 +151,8 @@ void mpcd::CellList::compute(uint64_t timestep) resetConditions(); } } while (overflowed); - + m_property_sum = true; + finishComputeProperties(); // we are finished building, explicitly mark everything (rather than using shouldCompute) m_first_compute = false; m_force_compute = false; @@ -308,6 +309,8 @@ void mpcd::CellList::computeDimensions() m_global_cell_indexer = Index3D(m_global_cell_dim.x, m_global_cell_dim.y, m_global_cell_dim.z); m_cell_indexer = Index3D(m_cell_dim.x, m_cell_dim.y, m_cell_dim.z); m_cell_np.resize(m_cell_indexer.getNumElements()); + m_cell_vel.resize(m_cell_indexer.getNumElements()); + m_cell_energy.resize(m_cell_indexer.getNumElements()); // reallocate per-cell memory reallocate(); @@ -317,6 +320,21 @@ void mpcd::CellList::computeDimensions() notifySizeChange(); } +void mpcd::CellList::startAutotuning() + { + Compute::startAutotuning(); +#ifdef ENABLE_MPI +#endif // ENABLE_MPI + } + +bool mpcd::CellList::isAutotuningComplete() + { + bool result = Compute::isAutotuningComplete(); +#ifdef ENABLE_MPI +#endif // ENABLE_MPI + return result; + } + #ifdef ENABLE_MPI /*! * \param dir Direction of communication @@ -356,8 +374,16 @@ void mpcd::CellList::buildCellList() access_location::host, access_mode::overwrite); ArrayHandle h_cell_np(m_cell_np, access_location::host, access_mode::overwrite); + ArrayHandle h_cell_vel(m_cell_vel, access_location::host, access_mode::overwrite); + ArrayHandle h_cell_energy(m_cell_energy, + access_location::host, + access_mode::overwrite); + // zero the cell counter m_cell_np.zeroFill(); + m_cell_vel.zeroFill(); + m_cell_energy.zeroFill(); + m_property_sum = false; uint3 conditions = make_uint3(0, 0, 0); @@ -373,6 +399,7 @@ void mpcd::CellList::buildCellList() // we can't modify the velocity of embedded particles, so we only read their position std::unique_ptr> h_embed_cell_ids; std::unique_ptr> h_pos_embed; + std::unique_ptr> h_vel_embed; std::unique_ptr> h_embed_member_idx; if (m_embed_group) { @@ -382,6 +409,9 @@ void mpcd::CellList::buildCellList() h_pos_embed.reset(new ArrayHandle(m_pdata->getPositions(), access_location::host, access_mode::read)); + h_vel_embed.reset(new ArrayHandle(m_pdata->getVelocities(), + access_location::host, + access_mode::read)); h_embed_member_idx.reset(new ArrayHandle(m_embed_group->getIndexArray(), access_location::host, access_mode::read)); @@ -405,15 +435,22 @@ void mpcd::CellList::buildCellList() for (unsigned int cur_p = 0; cur_p < N_tot; ++cur_p) { Scalar4 postype_i; + Scalar4 vel_mass_i; + double mass_i; if (cur_p < N_mpcd) { postype_i = h_pos.data[cur_p]; + vel_mass_i = h_vel.data[cur_p]; + mass_i = 1; } else { postype_i = h_pos_embed->data[h_embed_member_idx->data[cur_p - N_mpcd]]; + vel_mass_i = h_vel_embed->data[h_embed_member_idx->data[cur_p - N_mpcd]]; + mass_i = vel_mass_i.w; } - Scalar3 pos_i = make_scalar3(postype_i.x, postype_i.y, postype_i.z); + const Scalar3 pos_i = make_scalar3(postype_i.x, postype_i.y, postype_i.z); + const double3 vel_i = make_double3(vel_mass_i.x, vel_mass_i.y, vel_mass_i.z); if (std::isnan(pos_i.x) || std::isnan(pos_i.y) || std::isnan(pos_i.z)) { @@ -509,6 +546,22 @@ void mpcd::CellList::buildCellList() h_embed_cell_ids->data[cur_p - N_mpcd] = bin_idx; } + // compute the contribution of the particle to cell velocity + double4 cell_vel = h_cell_vel.data[bin_idx]; + cell_vel.x += mass_i * vel_i.x; + cell_vel.y += mass_i * vel_i.y; + cell_vel.z += mass_i * vel_i.z; + cell_vel.w += mass_i; + h_cell_vel.data[bin_idx] = make_double4(cell_vel.x, cell_vel.y, cell_vel.z, cell_vel.w); + // compute optional cell properties + if (m_flags[mpcd::detail::thermo_options::energy]) + { + double3 cell_energy = h_cell_energy.data[bin_idx]; + cell_energy.x + += 0.5 * mass_i * (vel_i.x * vel_i.x + vel_i.y * vel_i.y + vel_i.z * vel_i.z); + cell_energy.z += 1; + h_cell_energy.data[bin_idx] = make_double3(cell_energy.x, 0.0, cell_energy.z); + } // increment the counter always ++h_cell_np.data[bin_idx]; } @@ -517,51 +570,55 @@ void mpcd::CellList::buildCellList() m_conditions.resetFlags(conditions); } -/*! - * \param timestep Timestep that the sorting occurred - * \param order Mapping of sorted particle indexes onto old particle indexes - * \param rorder Mapping of old particle indexes onto sorted particle indexes - */ -void mpcd::CellList::sort(uint64_t timestep, - const GPUArray& order, - const GPUArray& rorder) +void mpcd::CellList::finishComputeProperties() { - // no need to do any sorting if we can still be called at the current timestep - if (peekCompute(timestep)) - return; - - // if mapping is not valid, signal that we need to force a recompute next time - // that the cell list is needed. We don't call forceCompute() directly because this - // always runs compute(), and we just want to defer to the next compute() call. - if (rorder.isNull()) + if (!m_property_sum) { - m_force_compute = true; return; } + ArrayHandle h_cell_np(m_cell_np, access_location::host, access_mode::overwrite); + ArrayHandle h_cell_vel(m_cell_vel, access_location::host, access_mode::readwrite); + ArrayHandle h_cell_energy(m_cell_energy, + access_location::host, + access_mode::readwrite); - // iterate through particles in cell list, and update their indexes using reverse - // mapping - ArrayHandle h_rorder(rorder, access_location::host, access_mode::read); - ArrayHandle h_cell_np(m_cell_np, access_location::host, access_mode::read); - ArrayHandle h_cell_list(m_cell_list, - access_location::host, - access_mode::readwrite); - const unsigned int N_mpcd = m_mpcd_pdata->getN(); - + // iterate over all cells and compute average velocity, energy, temperature + const bool need_energy = m_flags[mpcd::detail::thermo_options::energy]; for (unsigned int idx = 0; idx < getNCells(); ++idx) { - const unsigned int np = h_cell_np.data[idx]; - for (unsigned int offset = 0; offset < np; ++offset) + // average cell properties if the cell has mass + const double4 cell_vel = h_cell_vel.data[idx]; + double3 vel_cm = make_double3(cell_vel.x, cell_vel.y, cell_vel.z); + const double mass = cell_vel.w; + + if (mass > 0.) { - const unsigned int cl_idx = m_cell_list_indexer(offset, idx); - const unsigned int pid = h_cell_list.data[cl_idx]; - // only update indexes of MPCD particles, not virtual or embedded particles - if (pid < N_mpcd) + // average velocity is only defined when there is some mass in the cell + vel_cm.x /= mass; + vel_cm.y /= mass; + vel_cm.z /= mass; + } + h_cell_vel.data[idx] = make_double4(vel_cm.x, vel_cm.y, vel_cm.z, mass); + + if (need_energy) + { + const double3 cell_energy = h_cell_energy.data[idx]; + const double ke = cell_energy.x; + double temp(0.0); + const unsigned int np = h_cell_np.data[idx]; + // temperature is only defined for 2 or more particles + if (np > 1) { - h_cell_list.data[cl_idx] = h_rorder.data[pid]; + const double ke_cm + = 0.5 * mass + * (vel_cm.x * vel_cm.x + vel_cm.y * vel_cm.y + vel_cm.z * vel_cm.z); + temp = 2. * (ke - ke_cm) / (m_sysdef->getNDimensions() * (np - 1)); } + h_cell_energy.data[idx] = make_double3(ke, temp, __int_as_double(np)); } } + // ensure that properties will not be normalized twice + m_property_sum = false; } #ifdef ENABLE_MPI @@ -609,6 +666,18 @@ bool mpcd::CellList::needsEmbedMigrate(uint64_t timestep) } #endif // ENABLE_MPI +void mpcd::CellList::updateFlags() + { + mpcd::detail::ThermoFlags flags; + + if (!m_flag_signal.empty()) + { + m_flag_signal.emit_accumulate([&](mpcd::detail::ThermoFlags f) { flags |= f; }); + } + + m_flags = flags; + } + bool mpcd::CellList::checkConditions() { bool result = false; diff --git a/hoomd/mpcd/CellList.h b/hoomd/mpcd/CellList.h index f63c37d40f..8b6e6e66c1 100644 --- a/hoomd/mpcd/CellList.h +++ b/hoomd/mpcd/CellList.h @@ -13,12 +13,14 @@ #error This header cannot be compiled by nvcc #endif +#include "CellThermoTypes.h" #include "CommunicatorUtilities.h" #include "ParticleData.h" #include "hoomd/Compute.h" #include "hoomd/GPUFlags.h" #include "hoomd/ParticleGroup.h" +#include "hoomd/SystemDefinition.h" #include "hoomd/extern/nano-signal-slot/nano_signal_slot.hpp" #include @@ -48,6 +50,12 @@ class PYBIND11_EXPORT CellList : public Compute //! Sizes the cell list based on the box void computeDimensions(); + //! Start autotuning kernel launch parameters + void startAutotuning() override; + + //! Check if kernel autotuning is complete + bool isAutotuningComplete() override; + //! Get the cell list data const GPUArray& getCellList() const { @@ -60,6 +68,18 @@ class PYBIND11_EXPORT CellList : public Compute return m_cell_np; } + //! Get the cell velocities from the last call to compute + const GPUArray& getCellVelocities() const + { + return m_cell_vel; + } + + //! Get the cell energies from the last call to compute + const GPUArray& getCellEnergies() const + { + return m_cell_energy; + } + //! Get the total number of cells in the list const unsigned int getNCells() const { @@ -243,6 +263,23 @@ class PYBIND11_EXPORT CellList : public Compute return m_dim_signal; } + //! Get the signal for requested thermo flags + /*! + * \returns A signal that subscribers can attach a callback to in order + * to request certain data. + * + * For performance reasons, the CellThermoCompute should be able to + * supply many related cell-level quantities from a single kernel launch. + * However, sometimes these quantities are not needed, and it is better + * to skip calculating them. Subscribing classes can optionally request + * some of these quantities via a callback return mpcd::detail::ThermoFlags + * with the appropriate bits set. + */ + Nano::Signal& getFlagsSignal() + { + return m_flag_signal; + } + protected: std::shared_ptr m_mpcd_pdata; //!< MPCD particle data std::shared_ptr m_embed_group; //!< Embedded particles @@ -268,6 +305,14 @@ class PYBIND11_EXPORT CellList : public Compute int3 m_origin_idx; //!< Origin as a global index + GPUVector m_cell_vel; //!< Average velocity of a cell + cell mass + GPUVector m_cell_energy; //!< Kinetic energy, unscaled temperature, dof in each cell + + Nano::Signal m_flag_signal; //!< Signal for requested flags + mpcd::detail::ThermoFlags m_flags; //!< Requested thermo flags + //! Updates the requested optional flags + void updateFlags(); + #ifdef ENABLE_MPI unsigned int m_num_extra; //!< Number of extra cells to communicate over std::array m_num_comm; //!< Number of cells to communicate on each face @@ -286,13 +331,12 @@ class PYBIND11_EXPORT CellList : public Compute //! Builds the cell list and handles cell list memory virtual void buildCellList(); - //! Callback to sort cell list when particle data is sorted - virtual void sort(uint64_t timestep, - const GPUArray& order, - const GPUArray& rorder); + //! Do final cell property calculation + void finishComputeProperties(); private: bool m_needs_compute_dim; //!< True if the dimensions need to be (re-)computed + bool m_property_sum; //!< True if all contributions to cell properties have been accumulated //! Slot for box resizing void slotBoxChanged() { diff --git a/hoomd/mpcd/CellListGPU.cc b/hoomd/mpcd/CellListGPU.cc index ce51583a09..42ae5579ba 100644 --- a/hoomd/mpcd/CellListGPU.cc +++ b/hoomd/mpcd/CellListGPU.cc @@ -19,10 +19,7 @@ mpcd::CellListGPU::CellListGPU(std::shared_ptr sysdef, m_tuner_cell.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, m_exec_conf, "mpcd_cell")); - m_tuner_sort.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_cell_sort")); - m_autotuners.insert(m_autotuners.end(), {m_tuner_cell, m_tuner_sort}); + m_autotuners.insert(m_autotuners.end(), {m_tuner_cell}); #ifdef ENABLE_MPI m_tuner_embed_migrate.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, @@ -43,10 +40,7 @@ mpcd::CellListGPU::CellListGPU(std::shared_ptr sysdef, m_tuner_cell.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, m_exec_conf, "mpcd_cell")); - m_tuner_sort.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_cell_sort")); - m_autotuners.insert(m_autotuners.end(), {m_tuner_cell, m_tuner_sort}); + m_autotuners.insert(m_autotuners.end(), {m_tuner_cell}); #ifdef ENABLE_MPI m_tuner_embed_migrate.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, @@ -67,6 +61,10 @@ void mpcd::CellListGPU::buildCellList() access_location::device, access_mode::overwrite); ArrayHandle d_cell_np(m_cell_np, access_location::device, access_mode::overwrite); + ArrayHandle d_cell_vel(m_cell_vel, access_location::device, access_mode::overwrite); + ArrayHandle d_cell_energy(m_cell_energy, + access_location::device, + access_mode::overwrite); ArrayHandle d_pos(m_mpcd_pdata->getPositions(), access_location::device, access_mode::read); @@ -97,6 +95,9 @@ void mpcd::CellListGPU::buildCellList() ArrayHandle d_pos_embed(m_pdata->getPositions(), access_location::device, access_mode::read); + ArrayHandle d_vel_embed(m_pdata->getVelocities(), + access_location::device, + access_mode::read); ArrayHandle d_embed_member_idx(m_embed_group->getIndexArray(), access_location::device, access_mode::read); @@ -104,12 +105,15 @@ void mpcd::CellListGPU::buildCellList() m_tuner_cell->begin(); mpcd::gpu::compute_cell_list(d_cell_np.data, + d_cell_vel.data, + d_cell_energy.data, d_cell_list.data, m_conditions.getDeviceFlags(), d_vel.data, d_embed_cell_ids.data, d_pos.data, d_pos_embed.data, + d_vel_embed.data, d_embed_member_idx.data, m_pdata->getBox().getPeriodic(), m_origin_idx, @@ -122,6 +126,7 @@ void mpcd::CellListGPU::buildCellList() m_cell_list_indexer, N_mpcd, N_tot, + m_flags[mpcd::detail::thermo_options::energy], m_tuner_cell->getParam()[0]); if (m_exec_conf->isCUDAErrorCheckingEnabled()) CHECK_CUDA_ERROR(); @@ -131,6 +136,8 @@ void mpcd::CellListGPU::buildCellList() { m_tuner_cell->begin(); mpcd::gpu::compute_cell_list(d_cell_np.data, + d_cell_vel.data, + d_cell_energy.data, d_cell_list.data, m_conditions.getDeviceFlags(), d_vel.data, @@ -138,6 +145,7 @@ void mpcd::CellListGPU::buildCellList() d_pos.data, NULL, NULL, + NULL, m_pdata->getBox().getPeriodic(), m_origin_idx, m_grid_shift, @@ -149,6 +157,7 @@ void mpcd::CellListGPU::buildCellList() m_cell_list_indexer, N_mpcd, N_tot, + m_flags[mpcd::detail::thermo_options::energy], m_tuner_cell->getParam()[0]); if (m_exec_conf->isCUDAErrorCheckingEnabled()) CHECK_CUDA_ERROR(); @@ -156,45 +165,6 @@ void mpcd::CellListGPU::buildCellList() } } -/*! - * \param timestep Timestep that the sorting occurred - * \param order Mapping of sorted particle indexes onto old particle indexes - * \param rorder Mapping of old particle indexes onto sorted particle indexes - */ -void mpcd::CellListGPU::sort(uint64_t timestep, - const GPUArray& order, - const GPUArray& rorder) - { - // no need to do any sorting if we can still be called at the current timestep - if (peekCompute(timestep)) - return; - - // force a recompute if mapping is invalid - if (rorder.isNull()) - { - m_force_compute = true; - return; - } - - // iterate through particles in cell list, and update their indexes using reverse mapping - ArrayHandle d_cell_list(m_cell_list, - access_location::device, - access_mode::readwrite); - ArrayHandle d_rorder(rorder, access_location::device, access_mode::read); - ArrayHandle d_cell_np(m_cell_np, access_location::device, access_mode::read); - - m_tuner_sort->begin(); - mpcd::gpu::cell_apply_sort(d_cell_list.data, - d_rorder.data, - d_cell_np.data, - m_cell_list_indexer, - m_mpcd_pdata->getN(), - m_tuner_sort->getParam()[0]); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_tuner_sort->end(); - } - #ifdef ENABLE_MPI bool mpcd::CellListGPU::needsEmbedMigrate(uint64_t timestep) { diff --git a/hoomd/mpcd/CellListGPU.cu b/hoomd/mpcd/CellListGPU.cu index 40435b7946..7e3feb8376 100644 --- a/hoomd/mpcd/CellListGPU.cu +++ b/hoomd/mpcd/CellListGPU.cu @@ -19,12 +19,15 @@ namespace kernel //! Kernel to compute the MPCD cell list on the GPU /*! * \param d_cell_np Array of number of particles per cell + * \param d_cell_vel Array of center of mass velocity + cell mass per cell + * \param d_cell_energy Array of per cell kinetic energy, temperature, and dof * \param d_cell_list 2D array of MPCD particles in each cell * \param d_conditions Conditions flags for error reporting * \param d_vel MPCD particle velocities * \param d_embed_cell_ids Cell indexes of embedded particles * \param d_pos MPCD particle positions * \param d_pos_embed Particle positions + * \param d_vel_embed Particle velocities * \param d_embed_member_idx Indexes of embedded particles in \a d_pos_embed * \param periodic Flags if local simulation is periodic * \param origin_idx Global origin index for the local box @@ -37,21 +40,26 @@ namespace kernel * \param cell_list_indexer 2D indexer for particle position in cell * \param N_mpcd Number of MPCD particles * \param N_tot Total number of particle (MPCD + embedded) + * \param need_energy True if computing the energy * * \b Implementation * One thread is launched per particle. The particle is floored into a bin subject to a random grid - * shift. The number of particles in that bin is atomically incremented. If the addition of the + * shift. The number of particles in that bin is atomically incremented and the contribution of the + * particle's properties to the cell's properties is added to a running sum. If the addition of the * particle will not overflow the allocated memory, the particle is written into that bin. * Otherwise, a flag is set to resize the cell list and recompute. The MPCD particle's cell id is * stashed into the velocity array. */ __global__ void compute_cell_list(unsigned int* d_cell_np, + double4* d_cell_vel, + double3* d_cell_energy, unsigned int* d_cell_list, uint3* d_conditions, Scalar4* d_vel, unsigned int* d_embed_cell_ids, const Scalar4* d_pos, const Scalar4* d_pos_embed, + const Scalar4* d_vel_embed, const unsigned int* d_embed_member_idx, const uchar3 periodic, const int3 origin_idx, @@ -63,7 +71,8 @@ __global__ void compute_cell_list(unsigned int* d_cell_np, const Index3D cell_indexer, const Index2D cell_list_indexer, const unsigned int N_mpcd, - const unsigned int N_tot) + const unsigned int N_tot, + const bool need_energy) { // one thread per particle unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -71,15 +80,22 @@ __global__ void compute_cell_list(unsigned int* d_cell_np, return; Scalar4 postype_i; + Scalar4 vel_mass_i; + double mass_i; if (idx < N_mpcd) { postype_i = d_pos[idx]; + vel_mass_i = d_vel[idx]; + mass_i = 1.0; } else { postype_i = d_pos_embed[d_embed_member_idx[idx - N_mpcd]]; + vel_mass_i = d_vel_embed[d_embed_member_idx[idx - N_mpcd]]; + mass_i = vel_mass_i.w; } const Scalar3 pos_i = make_scalar3(postype_i.x, postype_i.y, postype_i.z); + const double3 vel_i = make_double3(vel_mass_i.x, vel_mass_i.y, vel_mass_i.z); if (isnan(pos_i.x) || isnan(pos_i.y) || isnan(pos_i.z)) { @@ -175,6 +191,19 @@ __global__ void compute_cell_list(unsigned int* d_cell_np, { d_embed_cell_ids[idx - N_mpcd] = bin_idx; } + + // compute the contribution of the particle to cell properties + atomicAdd(&d_cell_vel[bin_idx].x, vel_i.x * mass_i); + atomicAdd(&d_cell_vel[bin_idx].y, vel_i.y * mass_i); + atomicAdd(&d_cell_vel[bin_idx].z, vel_i.z * mass_i); + atomicAdd(&d_cell_vel[bin_idx].w, mass_i); + + if (need_energy) + { + double ke = 0.5 * mass_i * (vel_i.x * vel_i.x + vel_i.y * vel_i.y + vel_i.z * vel_i.z); + atomicAdd(&d_cell_energy[bin_idx].x, ke); + atomicAdd(&d_cell_energy[bin_idx].z, __int_as_double(1)); + } } /*! @@ -218,47 +247,21 @@ __global__ void cell_check_migrate_embed(unsigned int* d_migrate_flag, } } -__global__ void cell_apply_sort(unsigned int* d_cell_list, - const unsigned int* d_rorder, - const unsigned int* d_cell_np, - const Index2D cli, - const unsigned int N_mpcd, - const unsigned int N_cli) - { - // one thread per cell-list entry - const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= N_cli) - return; - - // convert the entry 1D index into a 2D index - const unsigned int cell = idx / cli.getW(); - const unsigned int offset = idx - (cell * cli.getW()); - - /* here comes some terrible execution divergence */ - // check if the cell is filled - const unsigned int np = d_cell_np[cell]; - if (offset < np) - { - // check if this is an MPCD particle - const unsigned int pid = d_cell_list[idx]; - if (pid < N_mpcd) - { - d_cell_list[idx] = d_rorder[pid]; - } - } - } } // end namespace kernel } // end namespace gpu } // end namespace mpcd /*! * \param d_cell_np Array of number of particles per cell + * \param d_cell_vel Array of center of mass velocity + cell mass per cell + * \param d_cell_energy Array of per cell kinetic energy, temperature, and dof * \param d_cell_list 2D array of MPCD particles in each cell * \param d_conditions Conditions flags for error reporting * \param d_vel MPCD particle velocities * \param d_embed_cell_ids Cell indexes of embedded particles * \param d_pos MPCD particle positions * \param d_pos_embed Particle positions + * \param d_vel_embed Particle velocities * \param d_embed_member_idx Indexes of embedded particles in \a d_pos_embed * \param periodic Flags if local simulation is periodic * \param origin_idx Global origin index for the local box @@ -271,17 +274,21 @@ __global__ void cell_apply_sort(unsigned int* d_cell_list, * \param cell_list_indexer 2D indexer for particle position in cell * \param N_mpcd Number of MPCD particles * \param N_tot Total number of particle (MPCD + embedded) + * \param need_energy True if computing the energy * \param block_size Number of threads per block * * \returns cudaSuccess on completion, or an error on failure */ cudaError_t mpcd::gpu::compute_cell_list(unsigned int* d_cell_np, + double4* d_cell_vel, + double3* d_cell_energy, unsigned int* d_cell_list, uint3* d_conditions, Scalar4* d_vel, unsigned int* d_embed_cell_ids, const Scalar4* d_pos, const Scalar4* d_pos_embed, + const Scalar4* d_vel_embed, const unsigned int* d_embed_member_idx, const uchar3& periodic, const int3& origin_idx, @@ -294,6 +301,7 @@ cudaError_t mpcd::gpu::compute_cell_list(unsigned int* d_cell_np, const Index2D& cell_list_indexer, const unsigned int N_mpcd, const unsigned int N_tot, + const bool need_energy, const unsigned int block_size) { // set the number of particles in each cell to zero @@ -301,7 +309,14 @@ cudaError_t mpcd::gpu::compute_cell_list(unsigned int* d_cell_np, = cudaMemset(d_cell_np, 0, sizeof(unsigned int) * cell_indexer.getNumElements()); if (error != cudaSuccess) return error; - + cudaError_t error_vel + = cudaMemset(d_cell_vel, 0, sizeof(double3) * cell_indexer.getNumElements()); + if (error_vel != cudaSuccess) + return error_vel; + cudaError_t error_energy + = cudaMemset(d_cell_energy, 0, sizeof(double3) * cell_indexer.getNumElements()); + if (error_energy != cudaSuccess) + return error_energy; unsigned int max_block_size; cudaFuncAttributes attr; cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::compute_cell_list); @@ -310,12 +325,15 @@ cudaError_t mpcd::gpu::compute_cell_list(unsigned int* d_cell_np, unsigned int run_block_size = min(block_size, max_block_size); dim3 grid(N_tot / run_block_size + 1); mpcd::gpu::kernel::compute_cell_list<<>>(d_cell_np, + d_cell_vel, + d_cell_energy, d_cell_list, d_conditions, d_vel, d_embed_cell_ids, d_pos, d_pos_embed, + d_vel_embed, d_embed_member_idx, periodic, origin_idx, @@ -327,7 +345,8 @@ cudaError_t mpcd::gpu::compute_cell_list(unsigned int* d_cell_np, cell_indexer, cell_list_indexer, N_mpcd, - N_tot); + N_tot, + need_energy); return cudaSuccess; } @@ -370,30 +389,4 @@ cudaError_t mpcd::gpu::cell_check_migrate_embed(unsigned int* d_migrate_flag, return cudaSuccess; } -cudaError_t mpcd::gpu::cell_apply_sort(unsigned int* d_cell_list, - const unsigned int* d_rorder, - const unsigned int* d_cell_np, - const Index2D& cli, - const unsigned int N_mpcd, - const unsigned int block_size) - { - unsigned int max_block_size; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::cell_apply_sort); - max_block_size = attr.maxThreadsPerBlock; - - const unsigned int N_cli = cli.getNumElements(); - - unsigned int run_block_size = min(block_size, max_block_size); - dim3 grid(N_cli / run_block_size + 1); - mpcd::gpu::kernel::cell_apply_sort<<>>(d_cell_list, - d_rorder, - d_cell_np, - cli, - N_mpcd, - N_cli); - - return cudaSuccess; - } - } // end namespace hoomd diff --git a/hoomd/mpcd/CellListGPU.cuh b/hoomd/mpcd/CellListGPU.cuh index e74440ba81..39287c3a64 100644 --- a/hoomd/mpcd/CellListGPU.cuh +++ b/hoomd/mpcd/CellListGPU.cuh @@ -23,12 +23,15 @@ namespace gpu { //! Kernel driver to compute mpcd cell list cudaError_t compute_cell_list(unsigned int* d_cell_np, + double4* d_cell_vel, + double3* d_cell_energy, unsigned int* d_cell_list, uint3* d_conditions, Scalar4* d_vel, unsigned int* d_embed_cell_ids, const Scalar4* d_pos, const Scalar4* d_pos_embed, + const Scalar4* d_vel_embed, const unsigned int* d_embed_member_idx, const uchar3& periodic, const int3& origin_idx, @@ -41,6 +44,7 @@ cudaError_t compute_cell_list(unsigned int* d_cell_np, const Index2D& cell_list_indexer, const unsigned int N_mpcd, const unsigned int N_tot, + const bool need_energy, const unsigned int block_size); //! Kernel driver to check if any embedded particles require migration @@ -52,14 +56,6 @@ cudaError_t cell_check_migrate_embed(unsigned int* d_migrate_flag, const unsigned int N, const unsigned int block_size); -//! Kernel drive to apply sorted order to MPCD particles in cell list -cudaError_t cell_apply_sort(unsigned int* d_cell_list, - const unsigned int* d_rorder, - const unsigned int* d_cell_np, - const Index2D& cli, - const unsigned int N_mpcd, - const unsigned int block_size); - } // end namespace gpu } // end namespace mpcd } // end namespace hoomd diff --git a/hoomd/mpcd/CellListGPU.h b/hoomd/mpcd/CellListGPU.h index f9f78f8779..c58ecce123 100644 --- a/hoomd/mpcd/CellListGPU.h +++ b/hoomd/mpcd/CellListGPU.h @@ -36,11 +36,6 @@ class PYBIND11_EXPORT CellListGPU : public mpcd::CellList //! Compute the cell list of particles on the GPU void buildCellList() override; - //! Callback to sort cell list on the GPU when particle data is sorted - virtual void sort(uint64_t timestep, - const GPUArray& order, - const GPUArray& rorder); - #ifdef ENABLE_MPI //! Determine if embedded particles require migration on the gpu virtual bool needsEmbedMigrate(uint64_t timestep); @@ -51,8 +46,6 @@ class PYBIND11_EXPORT CellListGPU : public mpcd::CellList /// Autotuner for the cell list calculation. std::shared_ptr> m_tuner_cell; - /// Autotuner for sorting the cell list. - std::shared_ptr> m_tuner_sort; #ifdef ENABLE_MPI /// Autotuner for checking embedded migration. std::shared_ptr> m_tuner_embed_migrate; diff --git a/hoomd/mpcd/CollisionMethod.cc b/hoomd/mpcd/CollisionMethod.cc index b7d59b66f7..e1fe10c9ef 100644 --- a/hoomd/mpcd/CollisionMethod.cc +++ b/hoomd/mpcd/CollisionMethod.cc @@ -99,12 +99,6 @@ void mpcd::CollisionMethod::collide(uint64_t timestep) } #endif // ENABLE_HIP - // set random grid shift - m_cl->drawGridShift(timestep); - - // update cell list - m_cl->compute(timestep); - // setup rigid bodies before collision happens const bool rigid_body_collision = m_embed_group && m_rigid_bodies; if (rigid_body_collision) @@ -143,6 +137,12 @@ void mpcd::CollisionMethod::collide(uint64_t timestep) } } + // set random grid shift + m_cl->drawGridShift(timestep); + + // update cell list + m_cl->compute(timestep); + // apply collisions rule(timestep); diff --git a/hoomd/mpcd/SRDCollisionMethod.cc b/hoomd/mpcd/SRDCollisionMethod.cc index c69b8ed16e..fc9d32679c 100644 --- a/hoomd/mpcd/SRDCollisionMethod.cc +++ b/hoomd/mpcd/SRDCollisionMethod.cc @@ -32,22 +32,20 @@ mpcd::SRDCollisionMethod::~SRDCollisionMethod() void mpcd::SRDCollisionMethod::startAutotuning() { mpcd::CollisionMethod::startAutotuning(); - if (m_thermo) - m_thermo->startAutotuning(); + if (m_cl) + m_cl->startAutotuning(); } bool mpcd::SRDCollisionMethod::isAutotuningComplete() { bool result = mpcd::CollisionMethod::isAutotuningComplete(); - if (m_thermo) - result = result && m_thermo->isAutotuningComplete(); + if (m_cl) + result = result && m_cl->isAutotuningComplete(); return result; } void mpcd::SRDCollisionMethod::rule(uint64_t timestep) { - m_thermo->compute(timestep); - // resize the rotation vectors and rescale factors m_rotvec.resize(m_cl->getNCells()); if (m_T) @@ -78,7 +76,7 @@ void mpcd::SRDCollisionMethod::drawRotationVectors(uint64_t timestep) { h_factors.reset( new ArrayHandle(m_factors, access_location::host, access_mode::overwrite)); - h_cell_energy.reset(new ArrayHandle(m_thermo->getCellEnergies(), + h_cell_energy.reset(new ArrayHandle(m_cl->getCellEnergies(), access_location::host, access_mode::read)); T_set = (*m_T)(timestep); @@ -162,7 +160,7 @@ void mpcd::SRDCollisionMethod::rotate(uint64_t timestep) } // acquire cell velocities - ArrayHandle h_cell_vel(m_thermo->getCellVelocities(), + ArrayHandle h_cell_vel(m_cl->getCellVelocities(), access_location::host, access_mode::read); @@ -260,33 +258,28 @@ void mpcd::SRDCollisionMethod::setCellList(std::shared_ptr cl) { if (cl != m_cl) { - CollisionMethod::setCellList(cl); detachCallbacks(); + CollisionMethod::setCellList(cl); if (m_cl) { - m_thermo = std::make_shared(m_sysdef, m_cl); attachCallbacks(); } - else - { - m_thermo = std::shared_ptr(); - } } } void mpcd::SRDCollisionMethod::attachCallbacks() { - assert(m_thermo); - m_thermo->getFlagsSignal() + assert(m_cl); + m_cl->getFlagsSignal() .connect( this); } void mpcd::SRDCollisionMethod::detachCallbacks() { - if (m_thermo) + if (m_cl) { - m_thermo->getFlagsSignal() + m_cl->getFlagsSignal() .disconnect(this); } diff --git a/hoomd/mpcd/SRDCollisionMethod.h b/hoomd/mpcd/SRDCollisionMethod.h index d4732269dd..2d7c09b84e 100644 --- a/hoomd/mpcd/SRDCollisionMethod.h +++ b/hoomd/mpcd/SRDCollisionMethod.h @@ -13,7 +13,6 @@ #error This header cannot be compiled by nvcc #endif -#include "CellThermoCompute.h" #include "CollisionMethod.h" #include "hoomd/Variant.h" @@ -81,9 +80,8 @@ class PYBIND11_EXPORT SRDCollisionMethod : public mpcd::CollisionMethod } protected: - std::shared_ptr m_thermo; //!< Cell thermo - GPUVector m_rotvec; //!< MPCD rotation vectors - Scalar m_angle; //!< MPCD rotation angle (degrees) + GPUVector m_rotvec; //!< MPCD rotation vectors + Scalar m_angle; //!< MPCD rotation angle (degrees) GPUVector m_factors; //!< Cell-level rescale factors diff --git a/hoomd/mpcd/SRDCollisionMethodGPU.cc b/hoomd/mpcd/SRDCollisionMethodGPU.cc index bb13dd00bb..169f4a10ae 100644 --- a/hoomd/mpcd/SRDCollisionMethodGPU.cc +++ b/hoomd/mpcd/SRDCollisionMethodGPU.cc @@ -37,7 +37,7 @@ void mpcd::SRDCollisionMethodGPU::drawRotationVectors(uint64_t timestep) if (m_T) { ArrayHandle d_factors(m_factors, access_location::device, access_mode::overwrite); - ArrayHandle d_cell_energy(m_thermo->getCellEnergies(), + ArrayHandle d_cell_energy(m_cl->getCellEnergies(), access_location::device, access_mode::read); @@ -89,7 +89,7 @@ void mpcd::SRDCollisionMethodGPU::rotate(uint64_t timestep) unsigned int N_tot = N_mpcd; // acquire cell velocities and rotation vectors - ArrayHandle d_cell_vel(m_thermo->getCellVelocities(), + ArrayHandle d_cell_vel(m_cl->getCellVelocities(), access_location::device, access_mode::read); ArrayHandle d_rotvec(m_rotvec, access_location::device, access_mode::read); @@ -162,13 +162,8 @@ void mpcd::SRDCollisionMethodGPU::setCellList(std::shared_ptr cl detachCallbacks(); if (m_cl) { - m_thermo = std::make_shared(m_sysdef, m_cl); attachCallbacks(); } - else - { - m_thermo = std::shared_ptr(); - } } } diff --git a/hoomd/mpcd/Sorter.cc b/hoomd/mpcd/Sorter.cc index cc178c372a..69089413ce 100644 --- a/hoomd/mpcd/Sorter.cc +++ b/hoomd/mpcd/Sorter.cc @@ -28,7 +28,7 @@ mpcd::Sorter::~Sorter() /*! * \param timestep Current simulation timestep * - * This method is just a driver for the computeOrder() and applyOrder() methods. + * This method is just a driver for the computeOrder() and applySortOrder() methods. */ void mpcd::Sorter::update(uint64_t timestep) { @@ -43,7 +43,7 @@ void mpcd::Sorter::update(uint64_t timestep) // generate and apply the sorted order computeOrder(timestep); - applyOrder(); + applySortOrder(); // trigger the sort signal for ParticleData callbacks using the current sortings m_mpcd_pdata->notifySort(timestep, m_order, m_rorder); @@ -100,7 +100,7 @@ void mpcd::Sorter::computeOrder(uint64_t timestep) * arrays. The communication flags are \b not sorted in MPI because by design, * the caller is responsible for clearing out any old flags before using them. */ -void mpcd::Sorter::applyOrder() const +void mpcd::Sorter::applySortOrder() const { // apply the sorted order { diff --git a/hoomd/mpcd/Sorter.h b/hoomd/mpcd/Sorter.h index 3a308cca0f..12f81739b0 100644 --- a/hoomd/mpcd/Sorter.h +++ b/hoomd/mpcd/Sorter.h @@ -28,7 +28,7 @@ namespace mpcd /*! * The Sorter puts MPCD particles into an order that is more cache-friendly. * The natural way to do this for the algorithm is cell list order. The base - * Sorter implements applyOrder() for applying the reordering map to the + * Sorter implements applySortOrder() for applying the reordering map to the * mpcd::ParticleData. Specific sorting algorithms can be implemented by * deriving from Sorter and implementing computeOrder(). Any computeOrder() * must set the map from old particle index to new particle index, and the @@ -71,7 +71,7 @@ class PYBIND11_EXPORT Sorter : public Tuner virtual void computeOrder(uint64_t timestep); //! Apply the sorting order - virtual void applyOrder() const; + virtual void applySortOrder() const; }; } // end namespace mpcd } // end namespace hoomd diff --git a/hoomd/mpcd/SorterGPU.cc b/hoomd/mpcd/SorterGPU.cc index 897b81919b..2620b507c0 100644 --- a/hoomd/mpcd/SorterGPU.cc +++ b/hoomd/mpcd/SorterGPU.cc @@ -105,7 +105,7 @@ void mpcd::SorterGPU::computeOrder(uint64_t timestep) * arrays. The communication flags are \b not sorted in MPI because by design, * the caller is responsible for clearing out any old flags before using them. */ -void mpcd::SorterGPU::applyOrder() const +void mpcd::SorterGPU::applySortOrder() const { // apply the sorted order { diff --git a/hoomd/mpcd/SorterGPU.h b/hoomd/mpcd/SorterGPU.h index 566db8aeb7..c7dd719b27 100644 --- a/hoomd/mpcd/SorterGPU.h +++ b/hoomd/mpcd/SorterGPU.h @@ -45,7 +45,7 @@ class PYBIND11_EXPORT SorterGPU : public mpcd::Sorter void computeOrder(uint64_t timestep) override; //! Apply the sorting order on the GPU - void applyOrder() const override; + void applySortOrder() const override; }; } // end namespace mpcd } // end namespace hoomd diff --git a/hoomd/mpcd/pytest/test_collide.py b/hoomd/mpcd/pytest/test_collide.py index 3bc4852f9b..42ee97f2c5 100644 --- a/hoomd/mpcd/pytest/test_collide.py +++ b/hoomd/mpcd/pytest/test_collide.py @@ -139,7 +139,7 @@ def test_run(self, small_snap, simulation_factory, cls, init_args): "angmom_rigid", [[0, 0, 0, 0], [0, 2, 3, 4]], ids=["Nonrotating", "Rotating"] ) @pytest.mark.parametrize( - "pos_rigid", [[0, 0, 0], [10, 10, 10]], ids=["center", "edge"] + "pos_rigid", [[0, 0, 0], [5, 5, 5]], ids=["center", "edge"] ) @pytest.mark.parametrize( "def_rigid,properties_rigid", @@ -216,7 +216,7 @@ def test_rigid_collide( # create simulation initial_snap = one_particle_snapshot_factory( - particle_types=["A", "B"], position=pos_rigid, L=21 + particle_types=["A", "B"], position=pos_rigid, L=11 ) total_mass = properties_rigid["mass"][0] if initial_snap.communicator.rank == 0: @@ -226,9 +226,12 @@ def test_rigid_collide( initial_snap.particles.angmom[:] = [angmom_rigid] # place the mpcd particles on top of constituents + positions = np.add(def_rigid["positions"], pos_rigid) + positions[positions < -11 * 0.5] = positions[positions < -11 * 0.5] + 11 + positions[positions > 11 * 0.5] = positions[positions > 11 * 0.5] - 11 initial_snap.mpcd.N = N_mpcd initial_snap.mpcd.types = ["C"] - initial_snap.mpcd.position[:] = def_rigid["positions"] + initial_snap.mpcd.position[:] = positions initial_snap.mpcd.velocity[:] = velo_mpcd sim = simulation_factory(initial_snap) @@ -338,7 +341,7 @@ def test_rigid_collide_free( # create simulation total_mass = properties_rigid["mass"][0] initial_snap = two_particle_snapshot_factory( - particle_types=["A", "B", "C", "D"], L=21 + particle_types=["A", "B", "C", "D"], L=11 ) if initial_snap.communicator.rank == 0: # put a free particle that doesn't participate in collision on top of