diff --git a/hoomd/mpcd/CMakeLists.txt b/hoomd/mpcd/CMakeLists.txt index 7ae9b10b8d..a0b5102004 100644 --- a/hoomd/mpcd/CMakeLists.txt +++ b/hoomd/mpcd/CMakeLists.txt @@ -5,9 +5,6 @@ endif() set(_mpcd_cc_sources module.cc - ATCollisionMethod.cc - CellCommunicator.cc - CellThermoCompute.cc CellList.cc CollisionMethod.cc Communicator.cc @@ -23,12 +20,9 @@ set(_mpcd_cc_sources ) set(_mpcd_headers - ATCollisionMethod.h BounceBackNVE.h BulkGeometry.h BulkStreamingMethod.h - CellCommunicator.h - CellThermoCompute.h CellList.h CollisionMethod.h BounceBackStreamingMethod.h @@ -70,8 +64,6 @@ set(_geometries if(ENABLE_HIP) list(APPEND _mpcd_cc_sources - ATCollisionMethodGPU.cc - CellThermoComputeGPU.cc CellListGPU.cc CommunicatorGPU.cc ParallelPlateGeometryFillerGPU.cc @@ -81,16 +73,11 @@ if(ENABLE_HIP) SRDCollisionMethodGPU.cc ) list(APPEND _mpcd_headers - ATCollisionMethodGPU.cuh - ATCollisionMethodGPU.h BounceBackNVEGPU.cuh BounceBackNVEGPU.h BounceBackStreamingMethodGPU.cuh BounceBackStreamingMethodGPU.h BulkStreamingMethodGPU.h - CellCommunicator.cuh - CellThermoComputeGPU.cuh - CellThermoComputeGPU.h CellListGPU.cuh CellListGPU.h CollisionMethod.cuh @@ -111,9 +98,7 @@ if(ENABLE_HIP) SRDCollisionMethodGPU.h ) set(_mpcd_cu_sources - ATCollisionMethodGPU.cu BounceBackNVEGPU.cu - CellThermoComputeGPU.cu CellListGPU.cu CollisionMethod.cu CommunicatorGPU.cu diff --git a/hoomd/mpcd/CellCommunicator.cc b/hoomd/mpcd/CellCommunicator.cc deleted file mode 100644 index ee52380b1c..0000000000 --- a/hoomd/mpcd/CellCommunicator.cc +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellCommunicator.cc - * \brief Definition of mpcd::CellCommunicator - */ - -#ifdef ENABLE_MPI - -#include "CellCommunicator.h" - -namespace hoomd - { -// initialize with zero instances of the communicator -unsigned int mpcd::CellCommunicator::num_instances = 0; - -/*! - * \param sysdef System definition - * \param cl MPCD cell list - */ -mpcd::CellCommunicator::CellCommunicator(std::shared_ptr sysdef, - std::shared_ptr cl) - : m_id(num_instances++), m_sysdef(sysdef), m_pdata(sysdef->getParticleData()), - m_exec_conf(m_pdata->getExecConf()), m_mpi_comm(m_exec_conf->getMPICommunicator()), - m_decomposition(m_pdata->getDomainDecomposition()), m_cl(cl), m_communicating(false), - m_send_buf(m_exec_conf), m_recv_buf(m_exec_conf), m_needs_init(true) - { - m_exec_conf->msg->notice(5) << "Constructing MPCD CellCommunicator" << std::endl; -#ifdef ENABLE_HIP - if (m_exec_conf->isCUDAEnabled()) - { - m_tuner_pack.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_cell_comm_pack_" + std::to_string(m_id))); - m_tuner_unpack.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_cell_comm_unpack_" + std::to_string(m_id))); - m_autotuners.insert(m_autotuners.end(), {m_tuner_pack, m_tuner_unpack}); - } -#endif // ENABLE_HIP - - m_cl->getSizeChangeSignal().connect( - this); - } - -mpcd::CellCommunicator::~CellCommunicator() - { - m_exec_conf->msg->notice(5) << "Destroying MPCD CellCommunicator" << std::endl; - m_cl->getSizeChangeSignal() - .disconnect(this); - } - -namespace mpcd - { -namespace detail - { -//! Unary operator to wrap global cell indexes into the local domain -struct LocalCellWrapOp - { - LocalCellWrapOp(std::shared_ptr cl_) - : cl(cl_), ci(cl_->getCellIndexer()), gci(cl_->getGlobalCellIndexer()) - { - } - - //! Transform the global 1D cell index into a local 1D cell index - inline unsigned int operator()(unsigned int cell_idx) - { - // convert the 1D global cell index to a global cell tuple - const uint3 cell = gci.getTriple(cell_idx); - - // convert the global cell tuple to a local cell tuple - int3 local_cell = cl->getLocalCell(make_int3(cell.x, cell.y, cell.z)); - - // wrap the local cell through the global boundaries, which should work for all reasonable - // cell comms. - if (local_cell.x >= (int)gci.getW()) - local_cell.x -= gci.getW(); - else if (local_cell.x < 0) - local_cell.x += gci.getW(); - - if (local_cell.y >= (int)gci.getH()) - local_cell.y -= gci.getH(); - else if (local_cell.y < 0) - local_cell.y += gci.getH(); - - if (local_cell.z >= (int)gci.getD()) - local_cell.z -= gci.getD(); - else if (local_cell.z < 0) - local_cell.z += gci.getD(); - - // convert the local cell tuple back to an index - return ci(local_cell.x, local_cell.y, local_cell.z); - } - - std::shared_ptr cl; //!< Cell list - const Index3D ci; //!< Cell indexer - const Index3D gci; //!< Global cell indexer - }; - } // end namespace detail - } // end namespace mpcd - -void mpcd::CellCommunicator::initialize() - { - // obtain domain decomposition - const Index3D& di = m_decomposition->getDomainIndexer(); - ArrayHandle h_cart_ranks(m_decomposition->getCartRanks(), - access_location::host, - access_mode::read); - const uint3 my_pos = m_decomposition->getGridPos(); - - // use the cell list to compute the bounds - const Index3D& ci = m_cl->getCellIndexer(); - const Index3D& global_ci = m_cl->getGlobalCellIndexer(); - auto num_comm_cells = m_cl->getNComm(); - const uint3 max_lo - = make_uint3(num_comm_cells[static_cast(mpcd::detail::face::west)], - num_comm_cells[static_cast(mpcd::detail::face::south)], - num_comm_cells[static_cast(mpcd::detail::face::down)]); - const uint3 min_hi = make_uint3( - ci.getW() - num_comm_cells[static_cast(mpcd::detail::face::east)], - ci.getH() - num_comm_cells[static_cast(mpcd::detail::face::north)], - ci.getD() - num_comm_cells[static_cast(mpcd::detail::face::up)]); - - // check to make sure box is not overdecomposed - { - const unsigned int nextra = m_cl->getNExtraCells(); - unsigned int err = ((max_lo.x + nextra) > min_hi.x || (max_lo.y + nextra) > min_hi.y - || (max_lo.z + nextra) > min_hi.z); - MPI_Allreduce(MPI_IN_PLACE, &err, 1, MPI_UNSIGNED, MPI_MAX, m_mpi_comm); - if (err) - { - m_exec_conf->msg->error() - << "mpcd: Simulation box is overdecomposed, decrease the number of ranks." - << std::endl; - throw std::runtime_error("Simulation box is overdecomposed for MPCD"); - } - } - - // loop over all cells in the grid and determine where to send them - std::multimap send_map; - std::set neighbors; - for (unsigned int k = 0; k < ci.getD(); ++k) - { - for (unsigned int j = 0; j < ci.getH(); ++j) - { - for (unsigned int i = 0; i < ci.getW(); ++i) - { - // skip any cells interior to the grid, which will not be communicated - // this is wasteful loop logic, but initialize will only be called rarely - if (i >= max_lo.x && i < min_hi.x && j >= max_lo.y && j < min_hi.y && k >= max_lo.z - && k < min_hi.z) - continue; - - // obtain the 1D global index of this cell - const int3 global_cell = m_cl->getGlobalCell(make_int3(i, j, k)); - const unsigned int global_cell_idx - = global_ci(global_cell.x, global_cell.y, global_cell.z); - - // check which direction the cell lies off rank in x,y,z - std::vector dx = {0}; - if (i < max_lo.x) - dx.push_back(-1); - else if (i >= min_hi.x) - dx.push_back(1); - - std::vector dy = {0}; - if (j < max_lo.y) - dy.push_back(-1); - else if (j >= min_hi.y) - dy.push_back(1); - - std::vector dz = {0}; - if (k < max_lo.z) - dz.push_back(-1); - else if (k >= min_hi.z) - dz.push_back(1); - - // generate all permutations of these neighbors for the cell - for (auto ddx = dx.begin(); ddx != dx.end(); ++ddx) - { - for (auto ddy = dy.begin(); ddy != dy.end(); ++ddy) - { - for (auto ddz = dz.begin(); ddz != dz.end(); ++ddz) - { - // skip self - if (*ddx == 0 && *ddy == 0 && *ddz == 0) - continue; - - // get neighbor rank tuple - int3 neigh = make_int3((int)my_pos.x + *ddx, - (int)my_pos.y + *ddy, - (int)my_pos.z + *ddz); - - // wrap neighbor through the boundaries - if (neigh.x < 0) - neigh.x += di.getW(); - else if (neigh.x >= (int)di.getW()) - neigh.x -= di.getW(); - - if (neigh.y < 0) - neigh.y += di.getH(); - else if (neigh.y >= (int)di.getH()) - neigh.y -= di.getH(); - - if (neigh.z < 0) - neigh.z += di.getD(); - else if (neigh.z >= (int)di.getD()) - neigh.z -= di.getD(); - - // convert neighbor to a linear rank and push it into the unique - // neighbor set - const unsigned int neigh_rank - = h_cart_ranks.data[di(neigh.x, neigh.y, neigh.z)]; - neighbors.insert(neigh_rank); - send_map.insert(std::make_pair(neigh_rank, global_cell_idx)); - } // ddz - } // ddy - } // ddx - } // i - } // j - } // k - - // allocate send / receive index arrays - { - GPUArray send_idx(send_map.size(), m_exec_conf); - m_send_idx.swap(send_idx); - } - - // fill the send indexes with the global values - // flood the array of unique neighbors and count the number to send - { - ArrayHandle h_send_idx(m_send_idx, - access_location::host, - access_mode::overwrite); - unsigned int idx = 0; - for (auto it = send_map.begin(); it != send_map.end(); ++it) - { - h_send_idx.data[idx++] = it->second; - } - - m_neighbors.resize(neighbors.size()); - m_begin.resize(m_neighbors.size()); - m_num_send.resize(m_neighbors.size()); - idx = 0; - for (auto it = neighbors.begin(); it != neighbors.end(); ++it) - { - auto lower = send_map.lower_bound(*it); - auto upper = send_map.upper_bound(*it); - - m_neighbors[idx] = *it; - m_begin[idx] = (unsigned int)std::distance(send_map.begin(), lower); - m_num_send[idx] = (unsigned int)std::distance(lower, upper); - ++idx; - } - } - - // send / receive the global cell indexes to be communicated with neighbors - std::vector recv_idx(m_send_idx.getNumElements()); - { - ArrayHandle h_send_idx(m_send_idx, access_location::host, access_mode::read); - - m_reqs.resize(2 * m_neighbors.size()); - for (unsigned int idx = 0; idx < m_neighbors.size(); ++idx) - { - const unsigned int offset = m_begin[idx]; - MPI_Isend(h_send_idx.data + offset, - m_num_send[idx], - MPI_INT, - m_neighbors[idx], - 0, - m_mpi_comm, - &m_reqs[2 * idx]); - MPI_Irecv(recv_idx.data() + offset, - m_num_send[idx], - MPI_INT, - m_neighbors[idx], - 0, - m_mpi_comm, - &m_reqs[2 * idx + 1]); - } - MPI_Waitall((unsigned int)m_reqs.size(), m_reqs.data(), MPI_STATUSES_IGNORE); - } - - // transform all of the global cell indexes back into local cell indexes - { - ArrayHandle h_send_idx(m_send_idx, - access_location::host, - access_mode::readwrite); - - mpcd::detail::LocalCellWrapOp wrapper(m_cl); - std::transform(h_send_idx.data, - h_send_idx.data + m_send_idx.getNumElements(), - h_send_idx.data, - wrapper); - std::transform(recv_idx.begin(), recv_idx.end(), recv_idx.begin(), wrapper); - } - - // map the received cells from a rank-basis to a cell-basis - { - std::multimap cell_map; - std::set unique_cells; - for (unsigned int idx = 0; idx < recv_idx.size(); ++idx) - { - const unsigned int cell = recv_idx[idx]; - unique_cells.insert(cell); - cell_map.insert(std::make_pair(cell, idx)); - } - m_num_cells = (unsigned int)unique_cells.size(); - - /* - * Allocate auxiliary memory for receiving cell reordering - */ - { - GPUArray recv(recv_idx.size(), m_exec_conf); - m_recv.swap(recv); - - GPUArray cells(m_num_cells, m_exec_conf); - m_cells.swap(cells); - - GPUArray recv_begin(m_num_cells, m_exec_conf); - m_recv_begin.swap(recv_begin); - - GPUArray recv_end(m_num_cells, m_exec_conf); - m_recv_end.swap(recv_end); - } - - /* - * Generate the compacted list of unique cells - */ - ArrayHandle h_cells(m_cells, access_location::host, access_mode::overwrite); - unsigned int idx = 0; - for (auto it = unique_cells.begin(); it != unique_cells.end(); ++it) - { - h_cells.data[idx++] = *it; - } - - /* - * Loop over the cell map to do run-length encoding on the keys. This - * determines the range of data belonging to each received cell. - */ - ArrayHandle h_recv(m_recv, access_location::host, access_mode::overwrite); - ArrayHandle h_recv_begin(m_recv_begin, - access_location::host, - access_mode::overwrite); - ArrayHandle h_recv_end(m_recv_end, - access_location::host, - access_mode::overwrite); - unsigned int last_cell = UINT_MAX; - unsigned int cell_idx = 0; - idx = 0; - h_recv_begin.data[cell_idx] = idx; - for (auto it = cell_map.begin(); it != cell_map.end(); ++it) - { - // record the sorted receive index - h_recv.data[idx] = it->second; - - // if not very first pass and the current cell does not match the - // last cell, then we are on a new cell, and need to demark an end / begin - if (last_cell != UINT_MAX && it->first != last_cell) - { - h_recv_end.data[cell_idx] = idx; - h_recv_begin.data[++cell_idx] = idx; - } - last_cell = it->first; - - ++idx; - } - h_recv_end.data[cell_idx] = idx; - } - } - } // end namespace hoomd - -#endif // ENABLE_MPI diff --git a/hoomd/mpcd/CellCommunicator.cuh b/hoomd/mpcd/CellCommunicator.cuh deleted file mode 100644 index 3bba09f5f6..0000000000 --- a/hoomd/mpcd/CellCommunicator.cuh +++ /dev/null @@ -1,227 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellCommunicator.cuh - * \brief Declaration of CUDA kernels for mpcd::CellCommunicator - */ - -#ifndef MPCD_CELL_COMMUNICATOR_CUH_ -#define MPCD_CELL_COMMUNICATOR_CUH_ - -#include "hoomd/HOOMDMath.h" -#include "hoomd/Index1D.h" - -#include - -namespace hoomd - { -namespace mpcd - { -namespace gpu - { -//! Kernel driver to pack cell communication buffer -template -cudaError_t __attribute__((visibility("default"))) -pack_cell_buffer(typename PackOpT::element* d_send_buf, - const T* d_props, - const unsigned int* d_send_idx, - const PackOpT op, - const unsigned int num_send, - unsigned int block_size); - -//! Kernel driver to unpack cell communication buffer -template -cudaError_t __attribute__((visibility("default"))) -unpack_cell_buffer(T* d_props, - const unsigned int* d_cells, - const unsigned int* d_recv, - const unsigned int* d_recv_begin, - const unsigned int* d_recv_end, - const typename PackOpT::element* d_recv_buf, - const PackOpT op, - const unsigned int num_cells, - const unsigned int block_size); - -#ifdef __HIPCC__ - -namespace kernel - { -//! Kernel to pack cell communication buffer -/*! - * \param d_send_buf Send buffer to pack (output) - * \param d_props Cell property to pack - * \param d_send_idx Cell indexes to pack into the buffer - * \param op Pack operator - * \param num_send Number of cells to pack - * \param block_size Number of threads per block - * - * \tparam T Type of data to pack (inferred) - * \tparam PackOpT Pack operation type (inferred) - * - * \b Implementation details: - * Using one thread per cell to pack, each cell to send has its properties read - * and packed into the send buffer. - */ -template -__global__ void pack_cell_buffer(typename PackOpT::element* d_send_buf, - const T* d_props, - const unsigned int* d_send_idx, - const PackOpT op, - const unsigned int num_send) - { - // one thread per buffer cell - unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= num_send) - return; - - const unsigned int send_idx = d_send_idx[idx]; - d_send_buf[idx] = op.pack(d_props[send_idx]); - } - -//! Kernel to unpack cell communication buffer -/*! - * \param d_props Cell property array to unpack into - * \param d_cells List of unique cells to unpack - * \param d_recv Map of unique cells onto the receive buffer - * \param d_recv_begin Beginning index into \a d_recv for cells to unpack - * \param d_recv_end End index (exclusive) into \a d_recv for cells to unpack - * \param d_recv_buf Received buffer from neighbor ranks - * \param op Packing operator - * \param num_cells Number of cells to unpack - * - * \tparam T Data type to unpack (inferred) - * \tparam PackOpT Pack operation type (inferred) - * - * \b Implementation details: - * Using one thread per cell to unpack, each cell iterates over the cells it - * has received, and applies the unpacking operator to each in turn. - */ -template -__global__ void unpack_cell_buffer(T* d_props, - const unsigned int* d_cells, - const unsigned int* d_recv, - const unsigned int* d_recv_begin, - const unsigned int* d_recv_end, - const typename PackOpT::element* d_recv_buf, - const PackOpT op, - const unsigned int num_cells) - { - // one thread per particle - unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= num_cells) - return; - - const unsigned int cell_idx = d_cells[idx]; - const unsigned int begin = d_recv_begin[idx]; - const unsigned int end = d_recv_end[idx]; - - // loop through all received data for this cell, and unpack it iteratively - T val = d_props[cell_idx]; - for (unsigned int i = begin; i < end; ++i) - { - val = op.unpack(d_recv_buf[d_recv[i]], val); - } - - // save the accumulated unpacked value - d_props[cell_idx] = val; - } - - } // end namespace kernel - -/*! - * \param d_send_buf Send buffer to pack (output) - * \param d_props Cell property to pack - * \param d_send_idx Cell indexes to pack into the buffer - * \param op Pack operator - * \param num_send Number of cells to pack - * \param block_size Number of threads per block - * - * \tparam T Type of data to pack (inferred) - * \tparam PackOpT Pack operator type - * - * \returns cudaSuccess on completion - * - * \sa mpcd::gpu::kernel::pack_cell_buffer - */ -template -cudaError_t pack_cell_buffer(typename PackOpT::element* d_send_buf, - const T* d_props, - const unsigned int* d_send_idx, - const PackOpT op, - const unsigned int num_send, - unsigned int block_size) - { - // determine runtime block size - unsigned int max_block_size; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::pack_cell_buffer); - max_block_size = attr.maxThreadsPerBlock; - - const unsigned int run_block_size = min(block_size, max_block_size); - - dim3 grid(num_send / run_block_size + 1); - mpcd::gpu::kernel::pack_cell_buffer<<>>(d_send_buf, - d_props, - d_send_idx, - op, - num_send); - - return cudaSuccess; - } - -/*! - * \param d_props Cell property array to unpack into - * \param d_cells List of unique cells to unpack - * \param d_recv Map of unique cells onto the receive buffer - * \param d_recv_begin Beginning index into \a d_recv for cells to unpack - * \param d_recv_end End index (exclusive) into \a d_recv for cells to unpack - * \param d_recv_buf Received buffer from neighbor ranks - * \param op Packing operator - * \param num_cells Number of cells to unpack - * - * \tparam T Data type to unpack (inferred) - * \tparam PackOpT Pack operation type (inferred) - * - * \returns cudaSuccess on completion - * - * \sa mpcd::gpu::kernel::unpack_cell_buffer - */ -template -cudaError_t unpack_cell_buffer(T* d_props, - const unsigned int* d_cells, - const unsigned int* d_recv, - const unsigned int* d_recv_begin, - const unsigned int* d_recv_end, - const typename PackOpT::element* d_recv_buf, - const PackOpT op, - const unsigned int num_cells, - const unsigned int block_size) - { - // determine runtime block size - unsigned int max_block_size; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::unpack_cell_buffer); - max_block_size = attr.maxThreadsPerBlock; - - const unsigned int run_block_size = min(block_size, max_block_size); - - dim3 grid(num_cells / run_block_size + 1); - mpcd::gpu::kernel::unpack_cell_buffer<<>>(d_props, - d_cells, - d_recv, - d_recv_begin, - d_recv_end, - d_recv_buf, - op, - num_cells); - - return cudaSuccess; - } -#endif // __HIPCC__ - - } // end namespace gpu - } // end namespace mpcd - } // end namespace hoomd - -#endif // MPCD_CELL_COMMUNICATOR_CUH_ diff --git a/hoomd/mpcd/CellCommunicator.h b/hoomd/mpcd/CellCommunicator.h deleted file mode 100644 index ea78f0f8d8..0000000000 --- a/hoomd/mpcd/CellCommunicator.h +++ /dev/null @@ -1,437 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellCommunicator.h - * \brief Declares and defines the mpcd::CellCommunicator class - */ - -#ifdef ENABLE_MPI - -#ifndef MPCD_CELL_COMMUNICATOR_H_ -#define MPCD_CELL_COMMUNICATOR_H_ - -#ifdef __HIPCC__ -#error This header cannot be compiled by nvcc -#endif - -#ifdef ENABLE_HIP -#include "CellCommunicator.cuh" -#endif // ENABLE_HIP - -#include "CellList.h" -#include "CommunicatorUtilities.h" - -#include "hoomd/Autotuned.h" -#include "hoomd/DomainDecomposition.h" -#include "hoomd/GPUArray.h" -#include "hoomd/GPUVector.h" -#include "hoomd/HOOMDMath.h" -#include "hoomd/Index1D.h" -#include "hoomd/SystemDefinition.h" - -#include -#include - -namespace hoomd - { -namespace mpcd - { -//! Communicates properties across the MPCD cell list -class PYBIND11_EXPORT CellCommunicator : public Autotuned - { - public: - //! Constructor - CellCommunicator(std::shared_ptr sysdef, std::shared_ptr cl); - - //! Destructor - virtual ~CellCommunicator(); - - //! Reduce cell list properties - /*! - * \param props Properties to reduce - * \param op Binary reduction operator - * - * Properties are reduced across domain boundaries, as marked in the cell list. - */ - template void communicate(const GPUArray& props, const PackOpT op) - { - begin(props, op); - finalize(props, op); - } - - //! Begin communication of the grid - template void begin(const GPUArray& props, const PackOpT op); - - //! Finalize communication of the grid - template void finalize(const GPUArray& props, const PackOpT op); - - //! Get the number of unique cells with communication - unsigned int getNCells() - { - if (m_needs_init) - { - initialize(); - m_needs_init = false; - } - return m_num_cells; - } - - //! Get the list of unique cells with communication - const GPUArray& getCells() - { - if (m_needs_init) - { - initialize(); - m_needs_init = false; - } - return m_cells; - } - - private: - static unsigned int num_instances; //!< Number of communicator instances - const unsigned int m_id; //!< Id for this communicator to use in tags - - std::shared_ptr m_sysdef; //!< System definition - std::shared_ptr m_pdata; //!< HOOMD particle data - std::shared_ptr m_exec_conf; //!< Execution configuration - const MPI_Comm m_mpi_comm; //!< MPI Communicator - std::shared_ptr m_decomposition; //!< Domain decomposition - - std::shared_ptr m_cl; //!< MPCD cell list - - bool m_communicating; //!< Flag if communication is occurring - GPUVector m_send_buf; //!< Send buffer - GPUVector m_recv_buf; //!< Receive buffer - GPUArray m_send_idx; //!< Indexes of cells in send buffer - std::vector m_reqs; //!< MPI request objects - - std::vector m_neighbors; //!< Unique neighbor ranks - std::vector m_begin; //!< Begin offset of every neighbor - std::vector m_num_send; //!< Number of cells to send to every neighbor - - unsigned int m_num_cells; //!< Number of unique cells to receive - GPUArray m_cells; //!< Unique cells to receive - GPUArray - m_recv; //!< Reordered mapping of buffer from ranks to group received cells together - GPUArray m_recv_begin; //!< Begin offset of every unique cell - GPUArray m_recv_end; //!< End offset of every unique cell - - bool m_needs_init; //!< Flag if grid needs to be initialized - //! Slot that communicator needs to be reinitialized - void slotInit() - { - m_needs_init = true; - } - - //! Initialize the grid - void initialize(); - - //! Packs the property buffer - template void packBuffer(const GPUArray& props, const PackOpT op); - - //! Unpacks the property buffer - template - void unpackBuffer(const GPUArray& props, const PackOpT op); - -#ifdef ENABLE_HIP - std::shared_ptr> m_tuner_pack; //!< Tuner for pack kernel - std::shared_ptr> m_tuner_unpack; //!< Tuner for unpack kernel - - //! Packs the property buffer on the GPU - template - void packBufferGPU(const GPUArray& props, const PackOpT op); - - //! Unpacks the property buffer on the GPU - template - void unpackBufferGPU(const GPUArray& props, const PackOpT op); -#endif // ENABLE_HIP - }; - - } // end namespace mpcd - -/*! - * \param props Property buffer to pack - * \param op Packing operator to apply to the send buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * The data in \a props is packed into the send buffers, and nonblocking MPI - * send / receive operations are initiated. If communication is already occurring, - * the method returns immediately and no action is taken. - */ -template -void mpcd::CellCommunicator::begin(const GPUArray& props, const PackOpT op) - { - // check if communication is occurring and place a lock on - if (m_communicating) - return; - m_communicating = true; - - // ensure that the property grid is sufficiently sized compared to the cell list - if (props.getNumElements() < m_cl->getNCells()) - { - m_exec_conf->msg->error() - << "mpcd: cell property to be reduced is smaller than cell list dimensions" - << std::endl; - throw std::runtime_error("MPCD cell property has insufficient dimensions"); - } - - // initialize grid if required - if (m_needs_init) - { - initialize(); - m_needs_init = false; - } - - // resize send / receive buffers for this comm element - m_send_buf.resize(m_send_idx.getNumElements() * sizeof(typename PackOpT::element)); - m_recv_buf.resize(m_send_idx.getNumElements() * sizeof(typename PackOpT::element)); - -#ifdef ENABLE_HIP - if (m_exec_conf->isCUDAEnabled()) - { - packBufferGPU(props, op); - } - else -#endif // ENABLE_HIP - { - packBuffer(props, op); - } - - // make the MPI calls - { - // determine whether to use CPU or GPU CUDA buffers - access_location::Enum mpi_loc; - { - mpi_loc = access_location::host; - } - - ArrayHandle h_send_buf(m_send_buf, mpi_loc, access_mode::read); - ArrayHandle h_recv_buf(m_recv_buf, mpi_loc, access_mode::overwrite); - typename PackOpT::element* send_buf - = reinterpret_cast(h_send_buf.data); - typename PackOpT::element* recv_buf - = reinterpret_cast(h_recv_buf.data); - - m_reqs.resize(2 * m_neighbors.size()); - for (unsigned int idx = 0; idx < m_neighbors.size(); ++idx) - { - const unsigned int neigh = m_neighbors[idx]; - const unsigned int offset = m_begin[idx]; - const size_t num_bytes = sizeof(typename PackOpT::element) * m_num_send[idx]; - MPI_Isend(send_buf + offset, - (unsigned int)num_bytes, - MPI_BYTE, - neigh, - 0, - m_mpi_comm, - &m_reqs[2 * idx]); - MPI_Irecv(recv_buf + offset, - (unsigned int)num_bytes, - MPI_BYTE, - neigh, - 0, - m_mpi_comm, - &m_reqs[2 * idx + 1]); - } - } - } - -//! Finalize communication of the grid -/*! - * \param props Property buffer to unpack - * \param op Packing operator to apply to the receive buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * Existing MPI requests are finalized, and then the received buffers are - * unpacked into \a props. If no communication is ongoing, the method immediately - * returns and takes no action. - */ -template -void mpcd::CellCommunicator::finalize(const GPUArray& props, const PackOpT op) - { - if (!m_communicating) - return; - - // finish all MPI requests - MPI_Waitall((unsigned int)m_reqs.size(), m_reqs.data(), MPI_STATUSES_IGNORE); - -// unpack the buffer -#ifdef ENABLE_HIP - if (m_exec_conf->isCUDAEnabled()) - { - unpackBufferGPU(props, op); - } - else -#endif // ENABLE_HIP - { - unpackBuffer(props, op); - } - - m_communicating = false; - } - -/*! - * \param props Property buffer to pack - * \param op Packing operator to apply to the send buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * The packing operator applies a unary pack functor to \a props, which permits - * a reduction or transformation of the data to be sent. See mpcd::detail::CellEnergyPackOp - * for an example. - * - * \post Communicated cells in \a props are packed into \a m_send_buf. - */ -template -void mpcd::CellCommunicator::packBuffer(const GPUArray& props, const PackOpT op) - { - ArrayHandle h_props(props, access_location::host, access_mode::read); - ArrayHandle h_send_idx(m_send_idx, access_location::host, access_mode::read); - ArrayHandle h_send_buf(m_send_buf, - access_location::host, - access_mode::overwrite); - typename PackOpT::element* send_buf - = reinterpret_cast(h_send_buf.data); - - for (unsigned int idx = 0; idx < m_send_idx.getNumElements(); ++idx) - { - send_buf[idx] = op.pack(h_props.data[h_send_idx.data[idx]]); - } - } - -/*! - * \param props Property buffer to pack - * \param op Packing operator to apply to the received buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * The packing operator must be associative so that the order it is applied - * to the received cells does not matter, e.g., addition. - * - * \post The bytes from \a m_recv_buf are unpacked into \a props. - */ -template -void mpcd::CellCommunicator::unpackBuffer(const GPUArray& props, const PackOpT op) - { - ArrayHandle h_recv(m_recv, access_location::host, access_mode::read); - ArrayHandle h_cells(m_cells, access_location::host, access_mode::read); - ArrayHandle h_recv_begin(m_recv_begin, access_location::host, access_mode::read); - ArrayHandle h_recv_end(m_recv_end, access_location::host, access_mode::read); - - ArrayHandle h_recv_buf(m_recv_buf, access_location::host, access_mode::read); - typename PackOpT::element* recv_buf - = reinterpret_cast(h_recv_buf.data); - - ArrayHandle h_props(props, access_location::host, access_mode::readwrite); - - for (unsigned int idx = 0; idx < m_num_cells; ++idx) - { - const unsigned int cell_idx = h_cells.data[idx]; - const unsigned int begin = h_recv_begin.data[idx]; - const unsigned int end = h_recv_end.data[idx]; - - // loop through all received data for this cell, and unpack it iteratively - T val = h_props.data[cell_idx]; - for (unsigned int i = begin; i < end; ++i) - { - val = op.unpack(recv_buf[h_recv.data[i]], val); - } - - // save the accumulated unpacked value - h_props.data[cell_idx] = val; - } - } - -#ifdef ENABLE_HIP -/*! - * \param props Property buffer to pack - * \param op Packing operator to apply to the send buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * The packing operator applies a unary pack functor to \a props, which permits - * a reduction or transformation of the data to be sent. See mpcd::detail::CellEnergyPackOp - * for an example. - * - * \post Communicated cells in \a props are packed into \a m_send_buf. - */ -template -void mpcd::CellCommunicator::packBufferGPU(const GPUArray& props, const PackOpT op) - { - ArrayHandle d_props(props, access_location::device, access_mode::read); - ArrayHandle d_send_idx(m_send_idx, access_location::device, access_mode::read); - ArrayHandle d_send_buf(m_send_buf, - access_location::device, - access_mode::overwrite); - typename PackOpT::element* send_buf - = reinterpret_cast(d_send_buf.data); - - m_tuner_pack->begin(); - mpcd::gpu::pack_cell_buffer(send_buf, - d_props.data, - d_send_idx.data, - op, - (unsigned int)m_send_idx.getNumElements(), - m_tuner_pack->getParam()[0]); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_tuner_pack->end(); - } - -/*! - * \param props Property buffer to pack - * \param op Packing operator to apply to the received buffer - * - * \tparam T Type of buffer to pack (inferred) - * \tparam PackOpT Packing operator type (inferred) - * - * The packing operator must be associative so that the order it is applied - * to the received cells does not matter, e.g., addition. - * - * \post The bytes from \a m_recv_buf are unpacked into \a props. - */ -template -void mpcd::CellCommunicator::unpackBufferGPU(const GPUArray& props, const PackOpT op) - { - ArrayHandle d_recv(m_recv, access_location::device, access_mode::read); - ArrayHandle d_cells(m_cells, access_location::device, access_mode::read); - ArrayHandle d_recv_begin(m_recv_begin, - access_location::device, - access_mode::read); - ArrayHandle d_recv_end(m_recv_end, access_location::device, access_mode::read); - - ArrayHandle d_recv_buf(m_recv_buf, access_location::device, access_mode::read); - typename PackOpT::element* recv_buf - = reinterpret_cast(d_recv_buf.data); - - ArrayHandle d_props(props, access_location::device, access_mode::readwrite); - - m_tuner_unpack->begin(); - mpcd::gpu::unpack_cell_buffer(d_props.data, - d_cells.data, - d_recv.data, - d_recv_begin.data, - d_recv_end.data, - recv_buf, - op, - m_num_cells, - m_tuner_unpack->getParam()[0]); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_tuner_unpack->end(); - } -#endif // ENABLE_HIP - - } // end namespace hoomd - -#endif // MPCD_CELL_COMMUNICATOR_H_ - -#endif // ENABLE_MPI diff --git a/hoomd/mpcd/CellList.cc b/hoomd/mpcd/CellList.cc index 58bacdd3a1..067c133c3c 100644 --- a/hoomd/mpcd/CellList.cc +++ b/hoomd/mpcd/CellList.cc @@ -18,14 +18,17 @@ namespace hoomd { mpcd::CellList::CellList(std::shared_ptr sysdef, Scalar cell_size, bool shift) - : Compute(sysdef), m_mpcd_pdata(m_sysdef->getMPCDParticleData()), m_cell_np_max(4), - m_cell_np(m_exec_conf), m_cell_list(m_exec_conf), m_embed_cell_ids(m_exec_conf), - m_conditions(m_exec_conf), m_needs_compute_dim(true), m_particles_sorted(false), - m_virtual_change(false) + : Compute(sysdef), m_mpcd_pdata(m_sysdef->getMPCDParticleData()), m_cell_np(m_exec_conf), + m_embed_cell_ids(m_exec_conf), m_conditions(m_exec_conf), m_cell_vel(m_exec_conf), + m_cell_energy(m_exec_conf), m_cell_temp(m_exec_conf), m_needs_net_reduce(true), + m_needs_compute_dim(true), m_particles_sorted(false), m_virtual_change(false) { assert(m_mpcd_pdata); m_exec_conf->msg->notice(5) << "Constructing MPCD CellList" << std::endl; + GPUArray net_properties(mpcd::detail::thermo_index::num_quantities, m_exec_conf); + m_net_properties.swap(net_properties); + setCellSize(cell_size); m_origin_idx = make_int3(0, 0, 0); m_cell_dim = make_uint3(0, 0, 0); @@ -41,7 +44,6 @@ mpcd::CellList::CellList(std::shared_ptr sysdef, Scalar cell_s m_cover_box = m_pdata->getBox(); #endif // ENABLE_MPI - m_mpcd_pdata->getSortSignal().connect(this); m_mpcd_pdata->getNumVirtualSignal().connect( this); m_pdata->getParticleSortSignal().connect(this); @@ -51,14 +53,17 @@ mpcd::CellList::CellList(std::shared_ptr sysdef, Scalar cell_s mpcd::CellList::CellList(std::shared_ptr sysdef, const uint3& global_cell_dim, bool shift) - : Compute(sysdef), m_mpcd_pdata(m_sysdef->getMPCDParticleData()), m_cell_np_max(4), - m_cell_np(m_exec_conf), m_cell_list(m_exec_conf), m_embed_cell_ids(m_exec_conf), - m_conditions(m_exec_conf), m_needs_compute_dim(true), m_particles_sorted(false), - m_virtual_change(false) + : Compute(sysdef), m_mpcd_pdata(m_sysdef->getMPCDParticleData()), m_cell_np(m_exec_conf), + m_embed_cell_ids(m_exec_conf), m_conditions(m_exec_conf), m_cell_vel(m_exec_conf), + m_cell_energy(m_exec_conf), m_cell_temp(m_exec_conf), m_needs_compute_dim(true), + m_particles_sorted(false), m_virtual_change(false) { assert(m_mpcd_pdata); m_exec_conf->msg->notice(5) << "Constructing MPCD CellList" << std::endl; + GPUArray net_properties(mpcd::detail::thermo_index::num_quantities, m_exec_conf); + m_net_properties.swap(net_properties); + setGlobalDim(global_cell_dim); m_origin_idx = make_int3(0, 0, 0); m_cell_dim = make_uint3(0, 0, 0); @@ -74,7 +79,6 @@ mpcd::CellList::CellList(std::shared_ptr sysdef, m_cover_box = m_pdata->getBox(); #endif // ENABLE_MPI - m_mpcd_pdata->getSortSignal().connect(this); m_mpcd_pdata->getNumVirtualSignal().connect( this); m_pdata->getParticleSortSignal().connect(this); @@ -84,7 +88,6 @@ mpcd::CellList::CellList(std::shared_ptr sysdef, mpcd::CellList::~CellList() { m_exec_conf->msg->notice(5) << "Destroying MPCD CellList" << std::endl; - m_mpcd_pdata->getSortSignal().disconnect(this); m_mpcd_pdata->getNumVirtualSignal().disconnect( this); m_pdata->getParticleSortSignal().disconnect(this); @@ -113,6 +116,9 @@ void mpcd::CellList::compute(uint64_t timestep) m_force_compute = true; } + // ensure optional computation flags are up to date + updateFlags(); + if (peekCompute(timestep)) { // ensure grid is shifted @@ -138,23 +144,15 @@ void mpcd::CellList::compute(uint64_t timestep) m_embed_cell_ids.resize(m_embed_group->getNumMembers()); } - bool overflowed = false; - do - { - buildCellList(); - - overflowed = checkConditions(); - - if (overflowed) - { - reallocate(); - resetConditions(); - } - } while (overflowed); + // bin particles and compute cell properties + buildCellList(); + checkConditions(); + finishComputeProperties(); // we are finished building, explicitly mark everything (rather than using shouldCompute) m_first_compute = false; m_force_compute = false; + m_needs_net_reduce = true; m_last_computed = timestep; // signal to the ParticleData that the cell list cache is now valid @@ -164,11 +162,10 @@ void mpcd::CellList::compute(uint64_t timestep) void mpcd::CellList::reallocate() { - m_exec_conf->msg->notice(6) << "Allocating MPCD cell list, " << m_cell_np_max - << " particles in " << m_cell_indexer.getNumElements() << " cells." - << std::endl; - m_cell_list_indexer = Index2D(m_cell_np_max, m_cell_indexer.getNumElements()); - m_cell_list.resize(m_cell_list_indexer.getNumElements()); + m_cell_np.resize(m_cell_indexer.getNumElements()); + m_cell_vel.resize(m_cell_indexer.getNumElements()); + m_cell_energy.resize(m_cell_indexer.getNumElements()); + m_cell_temp.resize(m_cell_indexer.getNumElements()); } void mpcd::CellList::computeDimensions() @@ -307,7 +304,6 @@ void mpcd::CellList::computeDimensions() // resize the cell indexers and per-cell counter m_global_cell_indexer = Index3D(m_global_cell_dim.x, m_global_cell_dim.y, m_global_cell_dim.z); m_cell_indexer = Index3D(m_cell_dim.x, m_cell_dim.y, m_cell_dim.z); - m_cell_np.resize(m_cell_indexer.getNumElements()); // reallocate per-cell memory reallocate(); @@ -317,6 +313,21 @@ void mpcd::CellList::computeDimensions() notifySizeChange(); } +void mpcd::CellList::startAutotuning() + { + Compute::startAutotuning(); +#ifdef ENABLE_MPI +#endif // ENABLE_MPI + } + +bool mpcd::CellList::isAutotuningComplete() + { + bool result = Compute::isAutotuningComplete(); +#ifdef ENABLE_MPI +#endif // ENABLE_MPI + return result; + } + #ifdef ENABLE_MPI /*! * \param dir Direction of communication @@ -352,12 +363,17 @@ void mpcd::CellList::buildCellList() const BoxDim& box = m_pdata->getBox(); const uchar3 periodic = box.getPeriodic(); - ArrayHandle h_cell_list(m_cell_list, - access_location::host, - access_mode::overwrite); - ArrayHandle h_cell_np(m_cell_np, access_location::host, access_mode::overwrite); // zero the cell counter m_cell_np.zeroFill(); + m_cell_vel.zeroFill(); + if (m_flags[mpcd::detail::thermo_options::energy]) + { + m_cell_energy.zeroFill(); + } + + ArrayHandle h_cell_np(m_cell_np, access_location::host, access_mode::readwrite); + ArrayHandle h_cell_vel(m_cell_vel, access_location::host, access_mode::readwrite); + ArrayHandle h_cell_energy(m_cell_energy, access_location::host, access_mode::readwrite); uint3 conditions = make_uint3(0, 0, 0); @@ -373,6 +389,7 @@ void mpcd::CellList::buildCellList() // we can't modify the velocity of embedded particles, so we only read their position std::unique_ptr> h_embed_cell_ids; std::unique_ptr> h_pos_embed; + std::unique_ptr> h_vel_embed; std::unique_ptr> h_embed_member_idx; if (m_embed_group) { @@ -382,6 +399,9 @@ void mpcd::CellList::buildCellList() h_pos_embed.reset(new ArrayHandle(m_pdata->getPositions(), access_location::host, access_mode::read)); + h_vel_embed.reset(new ArrayHandle(m_pdata->getVelocities(), + access_location::host, + access_mode::read)); h_embed_member_idx.reset(new ArrayHandle(m_embed_group->getIndexArray(), access_location::host, access_mode::read)); @@ -405,15 +425,22 @@ void mpcd::CellList::buildCellList() for (unsigned int cur_p = 0; cur_p < N_tot; ++cur_p) { Scalar4 postype_i; + Scalar4 vel_mass_i; + double mass_i; if (cur_p < N_mpcd) { postype_i = h_pos.data[cur_p]; + vel_mass_i = h_vel.data[cur_p]; + mass_i = m_mpcd_pdata->getMass(); } else { postype_i = h_pos_embed->data[h_embed_member_idx->data[cur_p - N_mpcd]]; + vel_mass_i = h_vel_embed->data[h_embed_member_idx->data[cur_p - N_mpcd]]; + mass_i = vel_mass_i.w; } - Scalar3 pos_i = make_scalar3(postype_i.x, postype_i.y, postype_i.z); + const Scalar3 pos_i = make_scalar3(postype_i.x, postype_i.y, postype_i.z); + const double3 vel_i = make_double3(vel_mass_i.x, vel_mass_i.y, vel_mass_i.z); if (std::isnan(pos_i.x) || std::isnan(pos_i.y) || std::isnan(pos_i.z)) { @@ -488,16 +515,6 @@ void mpcd::CellList::buildCellList() } unsigned int bin_idx = m_cell_indexer(bin.x, bin.y, bin.z); - unsigned int offset = h_cell_np.data[bin_idx]; - if (offset < m_cell_np_max) - { - h_cell_list.data[m_cell_list_indexer(offset, bin_idx)] = cur_p; - } - else - { - // overflow - conditions.x = std::max(conditions.x, offset + 1); - } // stash the current particle bin into the velocity array if (cur_p < N_mpcd) @@ -509,6 +526,18 @@ void mpcd::CellList::buildCellList() h_embed_cell_ids->data[cur_p - N_mpcd] = bin_idx; } + // compute the contribution of the particle to cell velocity + double4& cell_vel = h_cell_vel.data[bin_idx]; + cell_vel.x += mass_i * vel_i.x; + cell_vel.y += mass_i * vel_i.y; + cell_vel.z += mass_i * vel_i.z; + cell_vel.w += mass_i; + // compute optional cell properties + if (m_flags[mpcd::detail::thermo_options::energy]) + { + h_cell_energy.data[bin_idx] + += 0.5 * mass_i * (vel_i.x * vel_i.x + vel_i.y * vel_i.y + vel_i.z * vel_i.z); + } // increment the counter always ++h_cell_np.data[bin_idx]; } @@ -517,53 +546,136 @@ void mpcd::CellList::buildCellList() m_conditions.resetFlags(conditions); } -/*! - * \param timestep Timestep that the sorting occurred - * \param order Mapping of sorted particle indexes onto old particle indexes - * \param rorder Mapping of old particle indexes onto sorted particle indexes - */ -void mpcd::CellList::sort(uint64_t timestep, - const GPUArray& order, - const GPUArray& rorder) +void mpcd::CellList::finishComputeProperties() { - // no need to do any sorting if we can still be called at the current timestep - if (peekCompute(timestep)) - return; - - // if mapping is not valid, signal that we need to force a recompute next time - // that the cell list is needed. We don't call forceCompute() directly because this - // always runs compute(), and we just want to defer to the next compute() call. - if (rorder.isNull()) + const bool need_energy = m_flags[mpcd::detail::thermo_options::energy]; + if (need_energy) { - m_force_compute = true; - return; + m_cell_temp.zeroFill(); } - // iterate through particles in cell list, and update their indexes using reverse - // mapping - ArrayHandle h_rorder(rorder, access_location::host, access_mode::read); ArrayHandle h_cell_np(m_cell_np, access_location::host, access_mode::read); - ArrayHandle h_cell_list(m_cell_list, - access_location::host, - access_mode::readwrite); - const unsigned int N_mpcd = m_mpcd_pdata->getN(); - + ArrayHandle h_cell_vel(m_cell_vel, access_location::host, access_mode::readwrite); + ArrayHandle h_cell_energy(m_cell_energy, access_location::host, access_mode::read); + ArrayHandle h_cell_temp(m_cell_temp, access_location::host, access_mode::overwrite); + // iterate over all cells and compute average velocity, energy, temperature for (unsigned int idx = 0; idx < getNCells(); ++idx) { - const unsigned int np = h_cell_np.data[idx]; - for (unsigned int offset = 0; offset < np; ++offset) + // average cell properties if the cell has mass + const double4 cell_vel = h_cell_vel.data[idx]; + double3 vel_cm = make_double3(cell_vel.x, cell_vel.y, cell_vel.z); + const double mass = cell_vel.w; + + // get center of mass velocity from momentum + if (mass > 0.) + { + // average velocity is only defined when there is some mass in the cell + vel_cm.x /= mass; + vel_cm.y /= mass; + vel_cm.z /= mass; + } + h_cell_vel.data[idx] = make_double4(vel_cm.x, vel_cm.y, vel_cm.z, mass); + + if (need_energy) { - const unsigned int cl_idx = m_cell_list_indexer(offset, idx); - const unsigned int pid = h_cell_list.data[cl_idx]; - // only update indexes of MPCD particles, not virtual or embedded particles - if (pid < N_mpcd) + const double cell_energy = h_cell_energy.data[idx]; + double temp(0.0); + const unsigned int np = h_cell_np.data[idx]; + // temperature is only defined for 2 or more particles + if (np > 1) { - h_cell_list.data[cl_idx] = h_rorder.data[pid]; + const double ke_cm + = 0.5 * mass + * (vel_cm.x * vel_cm.x + vel_cm.y * vel_cm.y + vel_cm.z * vel_cm.z); + temp = 2. * (cell_energy - ke_cm) / (m_sysdef->getNDimensions() * (np - 1)); } + h_cell_temp.data[idx] = temp; } } } +void mpcd::CellList::computeNetProperties() + { + unsigned int n_temp_cells = 0; + { + const bool need_energy = m_flags[mpcd::detail::thermo_options::energy]; + ArrayHandle h_cell_np(m_cell_np, access_location::host, access_mode::read); + ArrayHandle h_cell_vel(m_cell_vel, access_location::host, access_mode::read); + ArrayHandle h_cell_energy(m_cell_energy, access_location::host, access_mode::read); + ArrayHandle h_cell_temp(m_cell_temp, access_location::host, access_mode::read); + double3 net_momentum = make_double3(0, 0, 0); + double net_energy(0.0), net_temp(0.0); + + // iterate over all cells and compute average velocity, energy, temperature + for (unsigned int idx = 0; idx < getNCells(); ++idx) + { + // average cell properties if the cell has mass + const double4 cell_vel = h_cell_vel.data[idx]; + const double3 vel_cm = make_double3(cell_vel.x, cell_vel.y, cell_vel.z); + const double mass = cell_vel.w; + + // add accumulated momentum to total net momentum + net_momentum.x += vel_cm.x * mass; + net_momentum.y += vel_cm.y * mass; + net_momentum.z += vel_cm.z * mass; + + if (need_energy) + { + const double cell_energy = h_cell_energy.data[idx]; + const double cell_temp = h_cell_temp.data[idx]; + // temperature is only defined for 2 or more particles + if (h_cell_np.data[idx] > 1) + { + ++n_temp_cells; + } + // accumulate + net_energy += cell_energy; + net_temp += cell_temp; + } + } + ArrayHandle h_net_properties(m_net_properties, + access_location::host, + access_mode::overwrite); + + h_net_properties.data[mpcd::detail::thermo_index::momentum_x] = net_momentum.x; + h_net_properties.data[mpcd::detail::thermo_index::momentum_y] = net_momentum.y; + h_net_properties.data[mpcd::detail::thermo_index::momentum_z] = net_momentum.z; + h_net_properties.data[mpcd::detail::thermo_index::energy] = net_energy; + h_net_properties.data[mpcd::detail::thermo_index::temperature] = net_temp; + } + +#ifdef ENABLE_MPI + if (m_decomposition) + { + ArrayHandle h_net_properties(m_net_properties, + access_location::host, + access_mode::readwrite); + MPI_Allreduce(MPI_IN_PLACE, + h_net_properties.data, + mpcd::detail::thermo_index::num_quantities, + MPI_DOUBLE, + MPI_SUM, + m_exec_conf->getMPICommunicator()); + + MPI_Allreduce(MPI_IN_PLACE, + &n_temp_cells, + 1, + MPI_UNSIGNED, + MPI_SUM, + m_exec_conf->getMPICommunicator()); + } +#endif // ENABLE_MPI + if (n_temp_cells > 0) + { + ArrayHandle h_net_properties(m_net_properties, + access_location::host, + access_mode::readwrite); + h_net_properties.data[mpcd::detail::thermo_index::temperature] /= (double)n_temp_cells; + } + // ensure that properties will not be normalized twice + m_needs_net_reduce = false; + } + #ifdef ENABLE_MPI bool mpcd::CellList::needsEmbedMigrate(uint64_t timestep) { @@ -609,17 +721,24 @@ bool mpcd::CellList::needsEmbedMigrate(uint64_t timestep) } #endif // ENABLE_MPI +void mpcd::CellList::updateFlags() + { + mpcd::detail::ThermoFlags flags; + + if (!m_flag_signal.empty()) + { + m_flag_signal.emit_accumulate([&](mpcd::detail::ThermoFlags f) { flags |= f; }); + } + + m_flags = flags; + } + bool mpcd::CellList::checkConditions() { bool result = false; uint3 conditions = m_conditions.readFlags(); - if (conditions.x > m_cell_np_max) - { - m_cell_np_max = conditions.x; - result = true; - } if (conditions.y) { unsigned int n = conditions.y - 1; diff --git a/hoomd/mpcd/CellList.h b/hoomd/mpcd/CellList.h index f63c37d40f..644c9ff273 100644 --- a/hoomd/mpcd/CellList.h +++ b/hoomd/mpcd/CellList.h @@ -13,6 +13,7 @@ #error This header cannot be compiled by nvcc #endif +#include "CellThermoTypes.h" #include "CommunicatorUtilities.h" #include "ParticleData.h" @@ -48,11 +49,11 @@ class PYBIND11_EXPORT CellList : public Compute //! Sizes the cell list based on the box void computeDimensions(); - //! Get the cell list data - const GPUArray& getCellList() const - { - return m_cell_list; - } + //! Start autotuning kernel launch parameters + void startAutotuning() override; + + //! Check if kernel autotuning is complete + bool isAutotuningComplete() override; //! Get the number of particles per cell const GPUArray& getCellSizeArray() const @@ -60,6 +61,24 @@ class PYBIND11_EXPORT CellList : public Compute return m_cell_np; } + //! Get the cell velocities from the last call to compute + const GPUArray& getCellVelocities() const + { + return m_cell_vel; + } + + //! Get the cell energies from the last call to compute + const GPUArray& getCellEnergies() const + { + return m_cell_energy; + } + + //! Get the cell temperature from the last call to compute + const GPUArray& getCellTemperature() const + { + return m_cell_temp; + } + //! Get the total number of cells in the list const unsigned int getNCells() const { @@ -78,12 +97,6 @@ class PYBIND11_EXPORT CellList : public Compute return m_global_cell_indexer; } - //! Get the cell list indexer - const Index2D& getCellListIndexer() const - { - return m_cell_list_indexer; - } - //! Get the number of cells in each dimension const uint3& getDim() const { @@ -112,12 +125,6 @@ class PYBIND11_EXPORT CellList : public Compute //! Wrap a cell into a global cell const int3 wrapGlobalCell(const int3& cell); - //! Get the maximum number of particles in a cell - const unsigned int getNmax() const - { - return m_cell_np_max; - } - //! Get the MPCD cell size (deprecated) Scalar3 getCellSize(); @@ -242,6 +249,71 @@ class PYBIND11_EXPORT CellList : public Compute { return m_dim_signal; } + //! Get the net momentum of the particles from the last call to compute + Scalar3 getNetMomentum() + { + if (m_needs_net_reduce) + computeNetProperties(); + ArrayHandle h_net_properties(m_net_properties, + access_location::host, + access_mode::read); + const Scalar3 net_momentum + = make_scalar3(h_net_properties.data[mpcd::detail::thermo_index::momentum_x], + h_net_properties.data[mpcd::detail::thermo_index::momentum_y], + h_net_properties.data[mpcd::detail::thermo_index::momentum_z]); + return net_momentum; + } + + //! Get the net energy of the particles from the last call to compute + Scalar getNetEnergy() + { + if (!m_flags[mpcd::detail::thermo_options::energy]) + { + m_exec_conf->msg->error() + << "Energy requested from CellList, but was not computed." << std::endl; + throw std::runtime_error("Net cell energy not available"); + } + if (m_needs_net_reduce) + computeNetProperties(); + ArrayHandle h_net_properties(m_net_properties, + access_location::host, + access_mode::read); + return h_net_properties.data[mpcd::detail::thermo_index::energy]; + } + + //! Get the average cell temperature from the last call to compute + Scalar getTemperature() + { + if (!m_flags[mpcd::detail::thermo_options::energy]) + { + m_exec_conf->msg->error() + << "Temperature requested from CellList, but was not computed." << std::endl; + throw std::runtime_error("Net cell temperature not available"); + } + if (m_needs_net_reduce) + computeNetProperties(); + ArrayHandle h_net_properties(m_net_properties, + access_location::host, + access_mode::read); + return h_net_properties.data[mpcd::detail::thermo_index::temperature]; + } + + //! Get the signal for requested thermo flags + /*! + * \returns A signal that subscribers can attach a callback to in order + * to request certain data. + * + * For performance reasons, the CellList should be able to + * supply many related cell-level quantities from a single kernel launch. + * However, sometimes these quantities are not needed, and it is better + * to skip calculating them. Subscribing classes can optionally request + * some of these quantities via a callback return mpcd::detail::ThermoFlags + * with the appropriate bits set. + */ + Nano::Signal& getFlagsSignal() + { + return m_flag_signal; + } protected: std::shared_ptr m_mpcd_pdata; //!< MPCD particle data @@ -259,15 +331,21 @@ class PYBIND11_EXPORT CellList : public Compute Scalar3 m_global_cell_dim_inv; //!< Inverse of number of cells in each direction of global box Index3D m_cell_indexer; //!< Indexer from 3D into cell list 1D Index3D m_global_cell_indexer; //!< Indexer from 3D into 1D for global cell indexes - Index2D m_cell_list_indexer; //!< Indexer into cell list members - unsigned int m_cell_np_max; //!< Maximum number of particles per cell GPUVector m_cell_np; //!< Number of particles per cell - GPUVector m_cell_list; //!< Cell list of particles GPUVector m_embed_cell_ids; //!< Cell ids of the embedded particles GPUFlags m_conditions; //!< Detect conditions that might fail building cell list int3 m_origin_idx; //!< Origin as a global index + GPUVector m_cell_vel; //!< Average velocity of a cell + cell mass + GPUVector m_cell_energy; //!< Kinetic energy + GPUVector m_cell_temp; //!< Unscaled temperature + GPUArray m_net_properties; //!< Scalar properties of the system + bool m_needs_net_reduce; //!< Flag if a net reduction is necessary + + Nano::Signal m_flag_signal; //!< Signal for requested flags + mpcd::detail::ThermoFlags m_flags; //!< Requested thermo flags + #ifdef ENABLE_MPI unsigned int m_num_extra; //!< Number of extra cells to communicate over std::array m_num_comm; //!< Number of cells to communicate on each face @@ -277,6 +355,9 @@ class PYBIND11_EXPORT CellList : public Compute virtual bool needsEmbedMigrate(uint64_t timestep); #endif // ENABLE_MPI + //! Updates the requested optional flags + void updateFlags(); + //! Check the condition flags bool checkConditions(); @@ -286,10 +367,11 @@ class PYBIND11_EXPORT CellList : public Compute //! Builds the cell list and handles cell list memory virtual void buildCellList(); - //! Callback to sort cell list when particle data is sorted - virtual void sort(uint64_t timestep, - const GPUArray& order, - const GPUArray& rorder); + //! Do final cell property calculation + virtual void finishComputeProperties(); + + //! Compute the net properties of all the cells + virtual void computeNetProperties(); private: bool m_needs_compute_dim; //!< True if the dimensions need to be (re-)computed diff --git a/hoomd/mpcd/CellListGPU.cc b/hoomd/mpcd/CellListGPU.cc index ce51583a09..fb5d260dd9 100644 --- a/hoomd/mpcd/CellListGPU.cc +++ b/hoomd/mpcd/CellListGPU.cc @@ -7,22 +7,24 @@ */ #include "CellListGPU.h" -#include "CellListGPU.cuh" namespace hoomd { mpcd::CellListGPU::CellListGPU(std::shared_ptr sysdef, Scalar cell_size, bool shift) - : mpcd::CellList(sysdef, cell_size, shift) + : mpcd::CellList(sysdef, cell_size, shift), m_tmp_thermo(m_exec_conf), m_reduced(m_exec_conf) { m_tuner_cell.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, m_exec_conf, "mpcd_cell")); - m_tuner_sort.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_cell_sort")); - m_autotuners.insert(m_autotuners.end(), {m_tuner_cell, m_tuner_sort}); + m_tuner_property.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, + m_exec_conf, + "mpcd_cell_property")); + m_tuner_net.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, + m_exec_conf, + "mpcd_cell_net_property")); + m_autotuners.insert(m_autotuners.end(), {m_tuner_cell, m_tuner_property, m_tuner_net}); #ifdef ENABLE_MPI m_tuner_embed_migrate.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, @@ -38,15 +40,19 @@ mpcd::CellListGPU::CellListGPU(std::shared_ptr sysdef, mpcd::CellListGPU::CellListGPU(std::shared_ptr sysdef, const uint3& global_cell_dim, bool shift) - : mpcd::CellList(sysdef, global_cell_dim, shift) + : mpcd::CellList(sysdef, global_cell_dim, shift), m_tmp_thermo(m_exec_conf), + m_reduced(m_exec_conf) { m_tuner_cell.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, m_exec_conf, "mpcd_cell")); - m_tuner_sort.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_cell_sort")); - m_autotuners.insert(m_autotuners.end(), {m_tuner_cell, m_tuner_sort}); + m_tuner_property.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, + m_exec_conf, + "mpcd_cell_property")); + m_tuner_net.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, + m_exec_conf, + "mpcd_cell_net_property")); + m_autotuners.insert(m_autotuners.end(), {m_tuner_cell, m_tuner_property, m_tuner_net}); #ifdef ENABLE_MPI m_tuner_embed_migrate.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, @@ -63,10 +69,11 @@ mpcd::CellListGPU::~CellListGPU() { } void mpcd::CellListGPU::buildCellList() { - ArrayHandle d_cell_list(m_cell_list, - access_location::device, - access_mode::overwrite); ArrayHandle d_cell_np(m_cell_np, access_location::device, access_mode::overwrite); + ArrayHandle d_cell_vel(m_cell_vel, access_location::device, access_mode::overwrite); + ArrayHandle d_cell_energy(m_cell_energy, + access_location::device, + access_mode::overwrite); ArrayHandle d_pos(m_mpcd_pdata->getPositions(), access_location::device, access_mode::read); @@ -97,6 +104,9 @@ void mpcd::CellListGPU::buildCellList() ArrayHandle d_pos_embed(m_pdata->getPositions(), access_location::device, access_mode::read); + ArrayHandle d_vel_embed(m_pdata->getVelocities(), + access_location::device, + access_mode::read); ArrayHandle d_embed_member_idx(m_embed_group->getIndexArray(), access_location::device, access_mode::read); @@ -104,12 +114,15 @@ void mpcd::CellListGPU::buildCellList() m_tuner_cell->begin(); mpcd::gpu::compute_cell_list(d_cell_np.data, - d_cell_list.data, + d_cell_vel.data, + d_cell_energy.data, m_conditions.getDeviceFlags(), d_vel.data, + m_mpcd_pdata->getMass(), d_embed_cell_ids.data, d_pos.data, d_pos_embed.data, + d_vel_embed.data, d_embed_member_idx.data, m_pdata->getBox().getPeriodic(), m_origin_idx, @@ -117,11 +130,10 @@ void mpcd::CellListGPU::buildCellList() m_pdata->getGlobalBox(), n_global_cells, m_global_cell_dim, - m_cell_np_max, m_cell_indexer, - m_cell_list_indexer, N_mpcd, N_tot, + m_flags[mpcd::detail::thermo_options::energy], m_tuner_cell->getParam()[0]); if (m_exec_conf->isCUDAErrorCheckingEnabled()) CHECK_CUDA_ERROR(); @@ -131,24 +143,26 @@ void mpcd::CellListGPU::buildCellList() { m_tuner_cell->begin(); mpcd::gpu::compute_cell_list(d_cell_np.data, - d_cell_list.data, + d_cell_vel.data, + d_cell_energy.data, m_conditions.getDeviceFlags(), d_vel.data, + m_mpcd_pdata->getMass(), NULL, d_pos.data, NULL, NULL, + NULL, m_pdata->getBox().getPeriodic(), m_origin_idx, m_grid_shift, m_pdata->getGlobalBox(), n_global_cells, m_global_cell_dim, - m_cell_np_max, m_cell_indexer, - m_cell_list_indexer, N_mpcd, N_tot, + m_flags[mpcd::detail::thermo_options::energy], m_tuner_cell->getParam()[0]); if (m_exec_conf->isCUDAErrorCheckingEnabled()) CHECK_CUDA_ERROR(); @@ -156,43 +170,127 @@ void mpcd::CellListGPU::buildCellList() } } -/*! - * \param timestep Timestep that the sorting occurred - * \param order Mapping of sorted particle indexes onto old particle indexes - * \param rorder Mapping of old particle indexes onto sorted particle indexes - */ -void mpcd::CellListGPU::sort(uint64_t timestep, - const GPUArray& order, - const GPUArray& rorder) +void mpcd::CellListGPU::finishComputeProperties() { - // no need to do any sorting if we can still be called at the current timestep - if (peekCompute(timestep)) - return; + ArrayHandle d_cell_np(m_cell_np, access_location::device, access_mode::read); + ArrayHandle d_cell_vel(m_cell_vel, access_location::device, access_mode::readwrite); + ArrayHandle d_cell_energy(m_cell_energy, access_location::device, access_mode::read); + ArrayHandle d_cell_temp(m_cell_temp, access_location::device, access_mode::overwrite); - // force a recompute if mapping is invalid - if (rorder.isNull()) + m_tuner_property->begin(); + mpcd::gpu::finish_cell_properties(d_cell_np.data, + d_cell_vel.data, + d_cell_energy.data, + d_cell_temp.data, + getNCells(), + m_sysdef->getNDimensions(), + m_flags[mpcd::detail::thermo_options::energy], + m_tuner_property->getParam()[0]); + if (m_exec_conf->isCUDAErrorCheckingEnabled()) + CHECK_CUDA_ERROR(); + m_tuner_property->end(); + } + +void mpcd::CellListGPU::computeNetProperties() + { { - m_force_compute = true; - return; + ArrayHandle d_cell_np(m_cell_np, access_location::device, access_mode::read); + ArrayHandle d_cell_vel(m_cell_vel, access_location::device, access_mode::read); + ArrayHandle d_cell_energy(m_cell_energy, + access_location::device, + access_mode::read); + ArrayHandle d_cell_temp(m_cell_temp, access_location::device, access_mode::read); + + m_tmp_thermo.resize(getNCells()); + + ArrayHandle d_tmp_thermo(m_tmp_thermo, + access_location::device, + access_mode::overwrite); + + m_tuner_net->begin(); + mpcd::gpu::stage_net_cell_thermo(d_tmp_thermo.data, + d_cell_np.data, + d_cell_vel.data, + d_cell_energy.data, + d_cell_temp.data, + getNCells(), + m_flags[mpcd::detail::thermo_options::energy], + m_tuner_property->getParam()[0]); + if (m_exec_conf->isCUDAErrorCheckingEnabled()) + CHECK_CUDA_ERROR(); + m_tuner_net->end(); + + // use cub to reduce the properties on the gpu + void* d_tmp = NULL; + size_t tmp_bytes = 0; + mpcd::gpu::reduce_net_cell_thermo(m_reduced.getDeviceFlags(), + d_tmp, + tmp_bytes, + d_tmp_thermo.data, + m_tmp_thermo.size()); + if (m_exec_conf->isCUDAErrorCheckingEnabled()) + CHECK_CUDA_ERROR(); + + ScopedAllocation d_tmp_alloc(m_exec_conf->getCachedAllocator(), + (tmp_bytes > 0) ? tmp_bytes : 1); + d_tmp = (void*)d_tmp_alloc(); + + mpcd::gpu::reduce_net_cell_thermo(m_reduced.getDeviceFlags(), + d_tmp, + tmp_bytes, + d_tmp_thermo.data, + m_tmp_thermo.size()); + if (m_exec_conf->isCUDAErrorCheckingEnabled()) + CHECK_CUDA_ERROR(); } - // iterate through particles in cell list, and update their indexes using reverse mapping - ArrayHandle d_cell_list(m_cell_list, - access_location::device, - access_mode::readwrite); - ArrayHandle d_rorder(rorder, access_location::device, access_mode::read); - ArrayHandle d_cell_np(m_cell_np, access_location::device, access_mode::read); + // now copy the net properties back to host from the flags + unsigned int n_temp_cells = 0; + { + const mpcd::detail::cell_thermo_element reduced = m_reduced.readFlags(); - m_tuner_sort->begin(); - mpcd::gpu::cell_apply_sort(d_cell_list.data, - d_rorder.data, - d_cell_np.data, - m_cell_list_indexer, - m_mpcd_pdata->getN(), - m_tuner_sort->getParam()[0]); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_tuner_sort->end(); + ArrayHandle h_net_properties(m_net_properties, + access_location::host, + access_mode::overwrite); + h_net_properties.data[mpcd::detail::thermo_index::momentum_x] = reduced.momentum.x; + h_net_properties.data[mpcd::detail::thermo_index::momentum_y] = reduced.momentum.y; + h_net_properties.data[mpcd::detail::thermo_index::momentum_z] = reduced.momentum.z; + + h_net_properties.data[mpcd::detail::thermo_index::energy] = reduced.energy; + h_net_properties.data[mpcd::detail::thermo_index::temperature] = reduced.temperature; + + n_temp_cells = reduced.flag; + } + +#ifdef ENABLE_MPI + { + ArrayHandle h_net_properties(m_net_properties, + access_location::host, + access_mode::readwrite); + MPI_Allreduce(MPI_IN_PLACE, + h_net_properties.data, + mpcd::detail::thermo_index::num_quantities, + MPI_DOUBLE, + MPI_SUM, + m_exec_conf->getMPICommunicator()); + + MPI_Allreduce(MPI_IN_PLACE, + &n_temp_cells, + 1, + MPI_UNSIGNED, + MPI_SUM, + m_exec_conf->getMPICommunicator()); + } +#endif // ENABLE_MPI + + if (n_temp_cells > 0) + { + ArrayHandle h_net_properties(m_net_properties, + access_location::host, + access_mode::readwrite); + h_net_properties.data[mpcd::detail::thermo_index::temperature] /= (double)n_temp_cells; + } + m_needs_net_reduce = false; } #ifdef ENABLE_MPI diff --git a/hoomd/mpcd/CellListGPU.cu b/hoomd/mpcd/CellListGPU.cu index 40435b7946..c77580da2a 100644 --- a/hoomd/mpcd/CellListGPU.cu +++ b/hoomd/mpcd/CellListGPU.cu @@ -5,6 +5,7 @@ * \file mpcd/CellListGPU.cu * \brief Defines GPU functions and kernels used by mpcd::CellListGPU */ +#include #include "CellListGPU.cuh" @@ -19,12 +20,15 @@ namespace kernel //! Kernel to compute the MPCD cell list on the GPU /*! * \param d_cell_np Array of number of particles per cell - * \param d_cell_list 2D array of MPCD particles in each cell + * \param d_cell_vel Array of center of mass velocity + cell mass per cell + * \param d_cell_energy Array of per cell kinetic energy * \param d_conditions Conditions flags for error reporting * \param d_vel MPCD particle velocities + * \param mpcd_mass MPCD particle mass * \param d_embed_cell_ids Cell indexes of embedded particles * \param d_pos MPCD particle positions * \param d_pos_embed Particle positions + * \param d_vel_embed Particle velocities * \param d_embed_member_idx Indexes of embedded particles in \a d_pos_embed * \param periodic Flags if local simulation is periodic * \param origin_idx Global origin index for the local box @@ -32,26 +36,29 @@ namespace kernel * \param global_box Global simulation box * \param n_global_cell Global dimensions of the cell list, including padding * \param global_cell_dim Global cell dimensions, no padding - * \param cell_np_max Maximum number of particles per cell * \param cell_indexer 3D indexer for cell id - * \param cell_list_indexer 2D indexer for particle position in cell * \param N_mpcd Number of MPCD particles * \param N_tot Total number of particle (MPCD + embedded) + * \param need_energy True if computing the energy * * \b Implementation * One thread is launched per particle. The particle is floored into a bin subject to a random grid - * shift. The number of particles in that bin is atomically incremented. If the addition of the + * shift. The number of particles in that bin is atomically incremented and the contribution of the + * particle's properties to the cell's properties is added to a running sum. If the addition of the * particle will not overflow the allocated memory, the particle is written into that bin. * Otherwise, a flag is set to resize the cell list and recompute. The MPCD particle's cell id is * stashed into the velocity array. */ __global__ void compute_cell_list(unsigned int* d_cell_np, - unsigned int* d_cell_list, + double4* d_cell_vel, + double* d_cell_energy, uint3* d_conditions, Scalar4* d_vel, + double mpcd_mass, unsigned int* d_embed_cell_ids, const Scalar4* d_pos, const Scalar4* d_pos_embed, + const Scalar4* d_vel_embed, const unsigned int* d_embed_member_idx, const uchar3 periodic, const int3 origin_idx, @@ -59,11 +66,10 @@ __global__ void compute_cell_list(unsigned int* d_cell_np, const BoxDim global_box, const uint3 n_global_cell, const uint3 global_cell_dim, - const unsigned int cell_np_max, const Index3D cell_indexer, - const Index2D cell_list_indexer, const unsigned int N_mpcd, - const unsigned int N_tot) + const unsigned int N_tot, + const bool need_energy) { // one thread per particle unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -71,15 +77,22 @@ __global__ void compute_cell_list(unsigned int* d_cell_np, return; Scalar4 postype_i; + Scalar4 vel_mass_i; + double mass_i; if (idx < N_mpcd) { postype_i = d_pos[idx]; + vel_mass_i = d_vel[idx]; + mass_i = mpcd_mass; } else { postype_i = d_pos_embed[d_embed_member_idx[idx - N_mpcd]]; + vel_mass_i = d_vel_embed[d_embed_member_idx[idx - N_mpcd]]; + mass_i = vel_mass_i.w; } const Scalar3 pos_i = make_scalar3(postype_i.x, postype_i.y, postype_i.z); + const double3 vel_i = make_double3(vel_mass_i.x, vel_mass_i.y, vel_mass_i.z); if (isnan(pos_i.x) || isnan(pos_i.y) || isnan(pos_i.z)) { @@ -156,25 +169,147 @@ __global__ void compute_cell_list(unsigned int* d_cell_np, const unsigned int bin_idx = cell_indexer(bin.x, bin.y, bin.z); const unsigned int offset = atomicInc(&d_cell_np[bin_idx], 0xffffffff); - if (offset < cell_np_max) + + // stash the current particle bin into the velocity array + if (idx < N_mpcd) { - d_cell_list[cell_list_indexer(offset, bin_idx)] = idx; + d_vel[idx].w = __int_as_scalar(bin_idx); } else { - // overflow - atomicMax(&(*d_conditions).x, offset + 1); + d_embed_cell_ids[idx - N_mpcd] = bin_idx; } - // stash the current particle bin into the velocity array - if (idx < N_mpcd) + // compute the contribution of the particle to cell properties + double4& cell_vel = d_cell_vel[bin_idx]; + atomicAdd(&cell_vel.x, vel_i.x * mass_i); + atomicAdd(&cell_vel.y, vel_i.y * mass_i); + atomicAdd(&cell_vel.z, vel_i.z * mass_i); + atomicAdd(&cell_vel.w, mass_i); + + if (need_energy) { - d_vel[idx].w = __int_as_scalar(bin_idx); + double ke = 0.5 * mass_i * (vel_i.x * vel_i.x + vel_i.y * vel_i.y + vel_i.z * vel_i.z); + atomicAdd(&d_cell_energy[bin_idx], ke); + } + } + +/*! + * \param d_cell_np Array of number of particles per cell + * \param d_cell_vel Cell velocity to finish computing + * \param d_cell_energy Cell kinetic energy + * \param d_cell_temp Cell temperature to finish computing + * \param N_cells Number of cells + * \param N_dim Number of dimensions + * \param need_energy If true, compute the cell-level energy properties. + * + * \b Implementation details: + * Using one thread per cell, the cell properties are normalized from the + * additive contributions of the particles to their final values, e.g. the cell + * velocities are computed from the net momentum of the cell divided by the final + * mass. The temperature of each cell is calculated. + */ +__global__ void finish_cell_properties(const unsigned int* d_cell_np, + double4* d_cell_vel, + const double* d_cell_energy, + double* d_cell_temp, + const unsigned int N_cells, + const unsigned int N_dim, + const bool need_energy) + { + // one thread per cell + unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= N_cells) + return; + + const double4 vel_cell = d_cell_vel[idx]; + double3 vel_cm = make_double3(vel_cell.x, vel_cell.y, vel_cell.z); + const double mass = vel_cell.w; + + // get center of mass velocity from momentum + if (mass > 0.) + { + // average velocity is only defined when there is some mass in the cell + vel_cm.x /= mass; + vel_cm.y /= mass; + vel_cm.z /= mass; + } + d_cell_vel[idx] = make_double4(vel_cm.x, vel_cm.y, vel_cm.z, mass); + + if (need_energy) + { + const double cell_energy = d_cell_energy[idx]; + double temp(0.0); + const unsigned int np = d_cell_np[idx]; + + if (np > 1) + { + const double ke_cm + = 0.5 * mass * (vel_cm.x * vel_cm.x + vel_cm.y * vel_cm.y + vel_cm.z * vel_cm.z); + temp = 2. * (cell_energy - ke_cm) / (N_dim * (np - 1)); + } + d_cell_temp[idx] = temp; + } + } + +/*! + * \param d_tmp_thermo Temporary cell packed thermo element + * \param d_cell_np Number of particles per cell + * \param d_cell_vel Cell velocity + * \param d_cell_energy Cell kinetic energy + * \param d_cell_temperature Cell temperature + * \param N_cells Number of cells + * \tparam need_energy If true, compute the cell-level energy properties. + * + * \b Implementation details: + * Using one thread per \a temporary cell, the cell properties are normalized + * in a way suitable for reduction of net properties, e.g. the cell velocities + * are converted to momentum. The temperature is set to the cell energy, and a + * flag is set to 1 or 0 to indicate whether this cell has an energy that should + * be used in averaging the total temperature. + */ +template +__global__ void stage_net_cell_thermo(mpcd::detail::cell_thermo_element* d_tmp_thermo, + const unsigned int* d_cell_np, + const double4* d_cell_vel, + const double* d_cell_energy, + const double* d_cell_temp, + const unsigned int N_cells) + { + // one thread per cell + unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= N_cells) + return; + + const double4 vel_cell = d_cell_vel[idx]; + const double mass = vel_cell.w; + + // add accumulated momentum to net momentum + mpcd::detail::cell_thermo_element thermo; + thermo.momentum = make_double3(vel_cell.x * mass, vel_cell.y * mass, vel_cell.z * mass); + + if (need_energy) + { + if (d_cell_np[idx] > 1) + { + thermo.temperature = d_cell_temp[idx]; + thermo.flag = 1; + } + else + { + thermo.temperature = 0.; + thermo.flag = 0; + } + thermo.energy = d_cell_energy[idx]; } else { - d_embed_cell_ids[idx - N_mpcd] = bin_idx; + thermo.energy = 0.; + thermo.temperature = 0.; + thermo.flag = 0; } + + d_tmp_thermo[idx] = thermo; } /*! @@ -217,48 +352,21 @@ __global__ void cell_check_migrate_embed(unsigned int* d_migrate_flag, atomicMax(d_migrate_flag, 1); } } - -__global__ void cell_apply_sort(unsigned int* d_cell_list, - const unsigned int* d_rorder, - const unsigned int* d_cell_np, - const Index2D cli, - const unsigned int N_mpcd, - const unsigned int N_cli) - { - // one thread per cell-list entry - const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= N_cli) - return; - - // convert the entry 1D index into a 2D index - const unsigned int cell = idx / cli.getW(); - const unsigned int offset = idx - (cell * cli.getW()); - - /* here comes some terrible execution divergence */ - // check if the cell is filled - const unsigned int np = d_cell_np[cell]; - if (offset < np) - { - // check if this is an MPCD particle - const unsigned int pid = d_cell_list[idx]; - if (pid < N_mpcd) - { - d_cell_list[idx] = d_rorder[pid]; - } - } - } } // end namespace kernel } // end namespace gpu } // end namespace mpcd /*! * \param d_cell_np Array of number of particles per cell - * \param d_cell_list 2D array of MPCD particles in each cell + * \param d_cell_vel Array of center of mass velocity + cell mass per cell + * \param d_cell_energy Array of per cell kinetic energy, temperature, and dof * \param d_conditions Conditions flags for error reporting * \param d_vel MPCD particle velocities + * \param mpcd_mass MPCD particle mass * \param d_embed_cell_ids Cell indexes of embedded particles * \param d_pos MPCD particle positions * \param d_pos_embed Particle positions + * \param d_vel_embed Particle velocities * \param d_embed_member_idx Indexes of embedded particles in \a d_pos_embed * \param periodic Flags if local simulation is periodic * \param origin_idx Global origin index for the local box @@ -266,22 +374,24 @@ __global__ void cell_apply_sort(unsigned int* d_cell_list, * \param global_box Global simulation box * \param n_global_cell Global dimensions of the cell list, including padding * \param global_cell_dim Global cell dimensions, no padding - * \param cell_np_max Maximum number of particles per cell * \param cell_indexer 3D indexer for cell id - * \param cell_list_indexer 2D indexer for particle position in cell * \param N_mpcd Number of MPCD particles * \param N_tot Total number of particle (MPCD + embedded) + * \param need_energy True if computing the energy * \param block_size Number of threads per block * * \returns cudaSuccess on completion, or an error on failure */ cudaError_t mpcd::gpu::compute_cell_list(unsigned int* d_cell_np, - unsigned int* d_cell_list, + double4* d_cell_vel, + double* d_cell_energy, uint3* d_conditions, Scalar4* d_vel, + double mpcd_mass, unsigned int* d_embed_cell_ids, const Scalar4* d_pos, const Scalar4* d_pos_embed, + const Scalar4* d_vel_embed, const unsigned int* d_embed_member_idx, const uchar3& periodic, const int3& origin_idx, @@ -289,11 +399,10 @@ cudaError_t mpcd::gpu::compute_cell_list(unsigned int* d_cell_np, const BoxDim& global_box, const uint3& n_global_cell, const uint3& global_cell_dim, - const unsigned int cell_np_max, const Index3D& cell_indexer, - const Index2D& cell_list_indexer, const unsigned int N_mpcd, const unsigned int N_tot, + const bool need_energy, const unsigned int block_size) { // set the number of particles in each cell to zero @@ -301,6 +410,17 @@ cudaError_t mpcd::gpu::compute_cell_list(unsigned int* d_cell_np, = cudaMemset(d_cell_np, 0, sizeof(unsigned int) * cell_indexer.getNumElements()); if (error != cudaSuccess) return error; + cudaError_t error_vel + = cudaMemset(d_cell_vel, 0, sizeof(double4) * cell_indexer.getNumElements()); + if (error_vel != cudaSuccess) + return error_vel; + if (need_energy) + { + cudaError_t error_energy + = cudaMemset(d_cell_energy, 0, sizeof(double) * cell_indexer.getNumElements()); + if (error_energy != cudaSuccess) + return error_energy; + } unsigned int max_block_size; cudaFuncAttributes attr; @@ -310,12 +430,15 @@ cudaError_t mpcd::gpu::compute_cell_list(unsigned int* d_cell_np, unsigned int run_block_size = min(block_size, max_block_size); dim3 grid(N_tot / run_block_size + 1); mpcd::gpu::kernel::compute_cell_list<<>>(d_cell_np, - d_cell_list, + d_cell_vel, + d_cell_energy, d_conditions, d_vel, + mpcd_mass, d_embed_cell_ids, d_pos, d_pos_embed, + d_vel_embed, d_embed_member_idx, periodic, origin_idx, @@ -323,12 +446,144 @@ cudaError_t mpcd::gpu::compute_cell_list(unsigned int* d_cell_np, global_box, n_global_cell, global_cell_dim, - cell_np_max, cell_indexer, - cell_list_indexer, N_mpcd, - N_tot); + N_tot, + need_energy); + + return cudaSuccess; + } + +/*! + * \param d_cell_np Number of particles in cells + * \param d_cell_vel Cell velocity to reduce + * \param d_cell_energy Cell kinetic energy + * \param d_cell_temp Cell temperature to finish computing + * \param N_cells Number of total cells + * \param N_dim Number of dimensions + * \param need_energy If true, compute the cell-level energy properties + * \param block_size Number of threads per block + * + * \returns cudaSuccess on completion + * + * \sa mpcd::gpu::kernel::finish_cell_properties + */ +cudaError_t mpcd::gpu::finish_cell_properties(const unsigned int* d_cell_np, + double4* d_cell_vel, + const double* d_cell_energy, + double* d_cell_temp, + const unsigned int N_cells, + const unsigned int N_dim, + const bool need_energy, + const unsigned int block_size) + { + // set all temperatures to zero + if (need_energy) + { + cudaError_t error_temp = cudaMemset(d_cell_temp, 0, sizeof(double) * N_cells); + if (error_temp != cudaSuccess) + return error_temp; + } + + unsigned int max_block_size; + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::finish_cell_properties); + max_block_size = attr.maxThreadsPerBlock; + + unsigned int run_block_size = min(block_size, max_block_size); + dim3 grid(N_cells / run_block_size + 1); + mpcd::gpu::kernel::finish_cell_properties<<>>(d_cell_np, + d_cell_vel, + d_cell_energy, + d_cell_temp, + N_cells, + N_dim, + need_energy); + + return cudaSuccess; + } + +/*! + * \param d_tmp_thermo Temporary cell packed thermo element + * \param d_cell_np Number of particles per cell + * \param d_cell_vel Cell velocity + * \param d_cell_energy Cell kinetic energy + * \param d_cell_temp Cell temperature + * \param N_cells Number of total cells + * \param need_energy If true, compute the cell-level energy properties + * \param block_size Number of threads per block + * + * \returns cudaSuccess on completion + * + * \sa mpcd::gpu::kernel::stage_net_cell_thermo + */ +cudaError_t mpcd::gpu::stage_net_cell_thermo(mpcd::detail::cell_thermo_element* d_tmp_thermo, + const unsigned int* d_cell_np, + const double4* d_cell_vel, + const double* d_cell_energy, + const double* d_cell_temp, + const unsigned int N_cells, + const bool need_energy, + const unsigned int block_size) + { + if (need_energy) + { + unsigned int max_block_size_energy; + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::stage_net_cell_thermo); + max_block_size_energy = attr.maxThreadsPerBlock; + + unsigned int run_block_size = min(block_size, max_block_size_energy); + dim3 grid(N_cells / run_block_size + 1); + mpcd::gpu::kernel::stage_net_cell_thermo<<>>(d_tmp_thermo, + d_cell_np, + d_cell_vel, + d_cell_energy, + d_cell_temp, + N_cells); + } + else + { + unsigned int max_block_size_noenergy; + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::stage_net_cell_thermo); + max_block_size_noenergy = attr.maxThreadsPerBlock; + unsigned int run_block_size = min(block_size, max_block_size_noenergy); + dim3 grid(N_cells / run_block_size + 1); + mpcd::gpu::kernel::stage_net_cell_thermo<<>>(d_tmp_thermo, + d_cell_np, + d_cell_vel, + d_cell_energy, + d_cell_temp, + N_cells); + } + return cudaSuccess; + } + +/*! + * \param d_reduced Cell thermo properties reduced across all cells (output on second call) + * \param d_tmp Temporary storage for reduction (output on first call) + * \param tmp_bytes Number of bytes allocated for temporary storage (output on first call) + * \param d_tmp_thermo Cell thermo properties to reduce + * \param N_cells The number of cells to reduce across + * + * \returns cudaSuccess on completion + * + * \b Implementation details: + * CUB DeviceReduce is used to perform the reduction. Hence, this function requires + * two calls to perform the reduction. The first call sizes the temporary storage, + * which is returned in \a d_tmp and \a tmp_bytes. The caller must then allocate + * the required bytes, and call the function a second time. This performs the + * reduction and returns the result in \a d_reduced. + */ +cudaError_t mpcd::gpu::reduce_net_cell_thermo(mpcd::detail::cell_thermo_element* d_reduced, + void* d_tmp, + size_t& tmp_bytes, + const mpcd::detail::cell_thermo_element* d_tmp_thermo, + const size_t N_cells) + { + cub::DeviceReduce::Sum(d_tmp, tmp_bytes, d_tmp_thermo, d_reduced, (unsigned int)N_cells); return cudaSuccess; } @@ -370,30 +625,4 @@ cudaError_t mpcd::gpu::cell_check_migrate_embed(unsigned int* d_migrate_flag, return cudaSuccess; } -cudaError_t mpcd::gpu::cell_apply_sort(unsigned int* d_cell_list, - const unsigned int* d_rorder, - const unsigned int* d_cell_np, - const Index2D& cli, - const unsigned int N_mpcd, - const unsigned int block_size) - { - unsigned int max_block_size; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::cell_apply_sort); - max_block_size = attr.maxThreadsPerBlock; - - const unsigned int N_cli = cli.getNumElements(); - - unsigned int run_block_size = min(block_size, max_block_size); - dim3 grid(N_cli / run_block_size + 1); - mpcd::gpu::kernel::cell_apply_sort<<>>(d_cell_list, - d_rorder, - d_cell_np, - cli, - N_mpcd, - N_cli); - - return cudaSuccess; - } - } // end namespace hoomd diff --git a/hoomd/mpcd/CellListGPU.cuh b/hoomd/mpcd/CellListGPU.cuh index e74440ba81..d7609ba62c 100644 --- a/hoomd/mpcd/CellListGPU.cuh +++ b/hoomd/mpcd/CellListGPU.cuh @@ -19,16 +19,49 @@ namespace hoomd { namespace mpcd { +namespace detail + { +#ifdef __HIPCC__ +#define HOSTDEVICE __host__ __device__ +#else +#define HOSTDEVICE +#endif +//! Custom cell thermo element for reductions on the gpu +struct cell_thermo_element + { + double3 momentum; //!< Momentum of the cell + double energy; //!< Energy of the cell + double temperature; //!< Temperature of the cell (0 if < 2 particles) + unsigned int flag; //!< Flag to be used to compute filled cells + + //! Addition operator for summed reduction + HOSTDEVICE cell_thermo_element operator+(const cell_thermo_element& other) const + { + cell_thermo_element sum; + sum.momentum.x = momentum.x + other.momentum.x; + sum.momentum.y = momentum.y + other.momentum.y; + sum.momentum.z = momentum.z + other.momentum.z; + sum.energy = energy + other.energy; + sum.temperature = temperature + other.temperature; + sum.flag = flag + other.flag; + + return sum; + } + }; + } // namespace detail namespace gpu { //! Kernel driver to compute mpcd cell list cudaError_t compute_cell_list(unsigned int* d_cell_np, - unsigned int* d_cell_list, + double4* d_cell_vel, + double* d_cell_energy, uint3* d_conditions, Scalar4* d_vel, + double mpcd_mass, unsigned int* d_embed_cell_ids, const Scalar4* d_pos, const Scalar4* d_pos_embed, + const Scalar4* d_vel_embed, const unsigned int* d_embed_member_idx, const uchar3& periodic, const int3& origin_idx, @@ -36,13 +69,39 @@ cudaError_t compute_cell_list(unsigned int* d_cell_np, const BoxDim& global_box, const uint3& n_global_cell, const uint3& global_cell_dim, - const unsigned int cell_np_max, const Index3D& cell_indexer, - const Index2D& cell_list_indexer, const unsigned int N_mpcd, const unsigned int N_tot, + const bool need_energy, const unsigned int block_size); +//! Kernel driver to finalize cell property calculation +cudaError_t finish_cell_properties(const unsigned int* d_cell_np, + double4* d_cell_vel, + const double* d_cell_energy, + double* d_cell_temp, + const unsigned int N_cells, + const unsigned int N_dim, + const bool need_energy, + const unsigned int block_size); + +//! Kernel driver to normalize center of mass velocity and compute net properties +cudaError_t stage_net_cell_thermo(mpcd::detail::cell_thermo_element* d_tmp_thermo, + const unsigned int* d_cell_np, + const double4* d_cell_vel, + const double* d_cell_energy, + const double* d_cell_temp, + const unsigned int N_cells, + const bool need_energy, + const unsigned int block_size); + +//! Wrapper to cub device reduce for cell thermo properties +cudaError_t reduce_net_cell_thermo(mpcd::detail::cell_thermo_element* d_reduced, + void* d_tmp, + size_t& tmp_bytes, + const mpcd::detail::cell_thermo_element* d_tmp_thermo, + const size_t N_cells); + //! Kernel driver to check if any embedded particles require migration cudaError_t cell_check_migrate_embed(unsigned int* d_migrate_flag, const Scalar4* d_pos, @@ -52,14 +111,6 @@ cudaError_t cell_check_migrate_embed(unsigned int* d_migrate_flag, const unsigned int N, const unsigned int block_size); -//! Kernel drive to apply sorted order to MPCD particles in cell list -cudaError_t cell_apply_sort(unsigned int* d_cell_list, - const unsigned int* d_rorder, - const unsigned int* d_cell_np, - const Index2D& cli, - const unsigned int N_mpcd, - const unsigned int block_size); - } // end namespace gpu } // end namespace mpcd } // end namespace hoomd diff --git a/hoomd/mpcd/CellListGPU.h b/hoomd/mpcd/CellListGPU.h index f9f78f8779..a1e3e3d494 100644 --- a/hoomd/mpcd/CellListGPU.h +++ b/hoomd/mpcd/CellListGPU.h @@ -14,6 +14,7 @@ #endif #include "CellList.h" +#include "CellListGPU.cuh" #include "hoomd/Autotuner.h" namespace hoomd @@ -36,10 +37,11 @@ class PYBIND11_EXPORT CellListGPU : public mpcd::CellList //! Compute the cell list of particles on the GPU void buildCellList() override; - //! Callback to sort cell list on the GPU when particle data is sorted - virtual void sort(uint64_t timestep, - const GPUArray& order, - const GPUArray& rorder); + //! Do final cell property calculation + void finishComputeProperties() override; + + //! Compute the net properties of all the cells + void computeNetProperties() override; #ifdef ENABLE_MPI //! Determine if embedded particles require migration on the gpu @@ -48,11 +50,19 @@ class PYBIND11_EXPORT CellListGPU : public mpcd::CellList #endif // ENABLE_MPI private: + GPUVector + m_tmp_thermo; //!< Temporary array for holding cell data + GPUFlags m_reduced; //!< Flags to hold reduced sum + /// Autotuner for the cell list calculation. std::shared_ptr> m_tuner_cell; - /// Autotuner for sorting the cell list. - std::shared_ptr> m_tuner_sort; + /// Autotuner for finishing cell property calculation. + std::shared_ptr> m_tuner_property; + + /// Autotuner for the net property calculation. + std::shared_ptr> m_tuner_net; + #ifdef ENABLE_MPI /// Autotuner for checking embedded migration. std::shared_ptr> m_tuner_embed_migrate; diff --git a/hoomd/mpcd/CellThermoCompute.cc b/hoomd/mpcd/CellThermoCompute.cc deleted file mode 100644 index c77285e3f7..0000000000 --- a/hoomd/mpcd/CellThermoCompute.cc +++ /dev/null @@ -1,617 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellThermoCompute.cc - * \brief Definition of mpcd::CellThermoCompute - */ - -#include "CellThermoCompute.h" -#include "ReductionOperators.h" - -namespace hoomd - { -/*! - * \param sysdef System definition - */ -mpcd::CellThermoCompute::CellThermoCompute(std::shared_ptr sysdef, - std::shared_ptr cl) - : Compute(sysdef), m_mpcd_pdata(m_sysdef->getMPCDParticleData()), m_cl(cl), - m_needs_net_reduce(true), m_cell_vel(m_exec_conf), m_cell_energy(m_exec_conf), - m_ncells_alloc(0) - { - m_exec_conf->msg->notice(5) << "Constructing MPCD CellThermoCompute" << std::endl; - - GPUArray net_properties(mpcd::detail::thermo_index::num_quantities, m_exec_conf); - m_net_properties.swap(net_properties); - -#ifdef ENABLE_MPI - if (m_exec_conf->getNRanks() > 1) - { - m_use_mpi = true; - m_vel_comm = std::make_shared(m_sysdef, m_cl); - m_energy_comm = std::make_shared(m_sysdef, m_cl); - } - else - { - m_use_mpi = false; - } -#endif // ENABLE_MPI - - // the thermo properties need to be recomputed if the virtual particles change - m_mpcd_pdata->getNumVirtualSignal() - .connect(this); - } - -mpcd::CellThermoCompute::~CellThermoCompute() - { - m_exec_conf->msg->notice(5) << "Destroying MPCD CellThermoCompute" << std::endl; - m_mpcd_pdata->getNumVirtualSignal() - .disconnect(this); - } - -void mpcd::CellThermoCompute::compute(uint64_t timestep) - { - Compute::compute(timestep); - // check if computation should proceed, and always mark the calculation as occurring at this - // timestep, even if forced - if (!shouldCompute(timestep)) - return; - m_last_computed = timestep; - - // cell list needs to be up to date first - m_cl->compute(timestep); - - // ensure optional flags are up to date - updateFlags(); - - const unsigned int ncells = m_cl->getNCells(); - if (ncells != m_ncells_alloc) - { - reallocate(ncells); - } - - computeCellProperties(timestep); - m_needs_net_reduce = true; - } - -void mpcd::CellThermoCompute::startAutotuning() - { - Compute::startAutotuning(); -#ifdef ENABLE_MPI - if (m_vel_comm) - m_vel_comm->startAutotuning(); - if (m_energy_comm) - m_energy_comm->startAutotuning(); -#endif // ENABLE_MPI - } - -bool mpcd::CellThermoCompute::isAutotuningComplete() - { - bool result = Compute::isAutotuningComplete(); -#ifdef ENABLE_MPI - if (m_vel_comm) - result = result && m_vel_comm->isAutotuningComplete(); - if (m_energy_comm) - result = result && m_energy_comm->isAutotuningComplete(); -#endif // ENABLE_MPI - return result; - } - -void mpcd::CellThermoCompute::computeCellProperties(uint64_t timestep) - { -/* - * In MPI simulations, begin by calculating the velocities and energies of - * cells that lie along the boundaries. These values will then be communicated - * while calculations proceed on the inner cells. - */ -#ifdef ENABLE_MPI - if (m_use_mpi) - { - beginOuterCellProperties(); - - m_vel_comm->begin(m_cell_vel, mpcd::detail::CellVelocityPackOp()); - if (m_flags[mpcd::detail::thermo_options::energy]) - m_energy_comm->begin(m_cell_energy, mpcd::detail::CellEnergyPackOp()); - } -#endif // ENABLE_MPI - - /* - * While communication is occurring on the outer cells, do the full calculation - * on the inner cells. In non-MPI simulations, only this part happens. - */ - calcInnerCellProperties(); - - /* - * Execute any additional callbacks that can be overlapped with outer communication. - */ - if (!m_callbacks.empty()) - m_callbacks.emit(timestep); - -/* - * In MPI simulations, we need to finalize the communication on the outer cells - * and normalize the communicated data. - */ -#ifdef ENABLE_MPI - if (m_use_mpi) - { - if (m_flags[mpcd::detail::thermo_options::energy]) - m_energy_comm->finalize(m_cell_energy, mpcd::detail::CellEnergyPackOp()); - m_vel_comm->finalize(m_cell_vel, mpcd::detail::CellVelocityPackOp()); - - finishOuterCellProperties(); - } -#endif // ENABLE_MPI - } - -namespace mpcd - { -namespace detail - { -//! Sums properties of an MPCD cell on the CPU -/*! - * This lightweight class is used in both beginOuterCellProperties() and - * calcInnerCellProperties(). The code has been consolidated into one place - * here to avoid some duplication. - */ -struct CellPropertySum - { - //! Constructor - /*! - * \param cell_list_ Cell list - * \param cell_np_ Number of particles per cell - * \param cli_ Cell list indexer - * \param vel_ MPCD particle velocities - * \param mass_ MPCD mass - * \param embed_vel_ Embedded particle velocities - * \param embed_idx_ Embedded particle indexes - * \param N_mpcd_ Number of MPCD particles - */ - CellPropertySum(const unsigned int* cell_list_, - const unsigned int* cell_np_, - const Index2D& cli_, - const Scalar4* vel_, - const Scalar mass_, - const Scalar4* embed_vel_, - const unsigned int* embed_idx_, - const unsigned int N_mpcd_) - : cell_list(cell_list_), cell_np(cell_np_), cli(cli_), vel(vel_), mass(mass_), - embed_vel(embed_vel_), embed_idx(embed_idx_), N_mpcd(N_mpcd_) - { - } - - //! Computes the total momentum, kinetic energy, and number of particles in a cell - /*! - * \param momentum Cell momentum (output) - * \param ke Cell kinetic energy (output) - * \param np Number of particles in cell (output) - * \param cell Index of cell to evaluate - * \param energy If true, then the kinetic energy is evaluated into \a ke - */ - inline void compute(double4& momentum, - double& ke, - unsigned int& np, - const unsigned int cell, - const bool energy) - { - momentum = make_double4(0.0, 0.0, 0.0, 0.0); - ke = 0.0; - np = cell_np[cell]; - - for (unsigned int offset = 0; offset < np; ++offset) - { - // Load particle data - const unsigned int cur_p = cell_list[cli(offset, cell)]; - double3 vel_i; - double mass_i; - if (cur_p < N_mpcd) - { - Scalar4 vel_cell = vel[cur_p]; - vel_i = make_double3(vel_cell.x, vel_cell.y, vel_cell.z); - mass_i = mass; - } - else - { - Scalar4 vel_m = embed_vel[embed_idx[cur_p - N_mpcd]]; - vel_i = make_double3(vel_m.x, vel_m.y, vel_m.z); - mass_i = vel_m.w; - } - - // add momentum - momentum.x += mass_i * vel_i.x; - momentum.y += mass_i * vel_i.y; - momentum.z += mass_i * vel_i.z; - momentum.w += mass_i; - - // also compute ke of the particle - if (energy) - ke += 0.5 * mass_i * (vel_i.x * vel_i.x + vel_i.y * vel_i.y + vel_i.z * vel_i.z); - } - } - - const unsigned int* cell_list; //!< Cell list - const unsigned int* cell_np; //!< Number of particles per cell - const Index2D cli; //!< Cell list indexer - - const Scalar4* vel; //!< MPCD particle velocities - const Scalar mass; //!< MPCD particle mass - const Scalar4* embed_vel; //!< Embedded particle velocities - const unsigned int* embed_idx; //!< Embedded particle indexes - const unsigned int N_mpcd; //!< Number of MPCD particles - }; - } // end namespace detail - } // end namespace mpcd - -#ifdef ENABLE_MPI -void mpcd::CellThermoCompute::beginOuterCellProperties() - { - // Cell list - ArrayHandle h_cell_list(m_cl->getCellList(), - access_location::host, - access_mode::read); - ArrayHandle h_cell_np(m_cl->getCellSizeArray(), - access_location::host, - access_mode::read); - const Index2D& cli = m_cl->getCellListIndexer(); - - // MPCD particle data - ArrayHandle h_vel(m_mpcd_pdata->getVelocities(), - access_location::host, - access_mode::read); - const Scalar mpcd_mass = m_mpcd_pdata->getMass(); - const unsigned int N_mpcd = m_mpcd_pdata->getN() + m_mpcd_pdata->getNVirtual(); - - // Embedded particle data - std::unique_ptr> h_embed_vel; - std::unique_ptr> h_embed_member_idx; - if (m_cl->getEmbeddedGroup()) - { - h_embed_vel.reset(new ArrayHandle(m_pdata->getVelocities(), - access_location::host, - access_mode::read)); - h_embed_member_idx.reset( - new ArrayHandle(m_cl->getEmbeddedGroup()->getIndexArray(), - access_location::host, - access_mode::read)); - } - - // Cell properties - ArrayHandle h_cell_vel(m_cell_vel, access_location::host, access_mode::overwrite); - ArrayHandle h_cell_energy(m_cell_energy, - access_location::host, - access_mode::overwrite); - ArrayHandle h_cells(m_vel_comm->getCells(), - access_location::host, - access_mode::read); - mpcd::detail::CellPropertySum summer(h_cell_list.data, - h_cell_np.data, - cli, - h_vel.data, - mpcd_mass, - (m_cl->getEmbeddedGroup()) ? h_embed_vel->data : NULL, - (m_cl->getEmbeddedGroup()) ? h_embed_member_idx->data - : NULL, - N_mpcd); - - // Loop over all outer cells and compute total momentum, mass, energy - const bool need_energy = m_flags[mpcd::detail::thermo_options::energy]; - for (unsigned int idx = 0; idx < m_vel_comm->getNCells(); ++idx) - { - const unsigned int cur_cell = h_cells.data[idx]; - - // compute the cell properties - double4 momentum; - double ke(0.0); - unsigned int np(0); - summer.compute(momentum, ke, np, cur_cell, need_energy); - - h_cell_vel.data[cur_cell] = make_double4(momentum.x, momentum.y, momentum.z, momentum.w); - if (need_energy) - h_cell_energy.data[cur_cell] = make_double3(ke, 0.0, __int_as_double(np)); - } - } - -void mpcd::CellThermoCompute::finishOuterCellProperties() - { - ArrayHandle h_cell_vel(m_cell_vel, access_location::host, access_mode::readwrite); - ArrayHandle h_cell_energy(m_cell_energy, - access_location::host, - access_mode::readwrite); - ArrayHandle h_cells(m_vel_comm->getCells(), - access_location::host, - access_mode::read); - - // Loop over all outer cells and normalize the summed quantities - const bool need_energy = m_flags[mpcd::detail::thermo_options::energy]; - for (unsigned int idx = 0; idx < m_vel_comm->getNCells(); ++idx) - { - const unsigned int cur_cell = h_cells.data[idx]; - - // average cell properties if the cell has mass - const double4 cell_vel = h_cell_vel.data[cur_cell]; - double3 vel_cm = make_double3(cell_vel.x, cell_vel.y, cell_vel.z); - const double mass = cell_vel.w; - - if (mass > 0.) - { - // average velocity is only defined when there is some mass in the cell - vel_cm.x /= mass; - vel_cm.y /= mass; - vel_cm.z /= mass; - } - h_cell_vel.data[cur_cell] = make_double4(vel_cm.x, vel_cm.y, vel_cm.z, mass); - - if (need_energy) - { - const double3 cell_energy = h_cell_energy.data[cur_cell]; - const double ke = cell_energy.x; - double temp(0.0); - const unsigned int np = __double_as_int(cell_energy.z); - // temperature is only defined for 2 or more particles - if (np > 1) - { - const double ke_cm - = 0.5 * mass - * (vel_cm.x * vel_cm.x + vel_cm.y * vel_cm.y + vel_cm.z * vel_cm.z); - temp = 2. * (ke - ke_cm) / (m_sysdef->getNDimensions() * (np - 1)); - } - h_cell_energy.data[cur_cell] = make_double3(ke, temp, __int_as_double(np)); - } - } - } -#endif // ENABLE_MPI - -void mpcd::CellThermoCompute::calcInnerCellProperties() - { - // Cell list - const Index2D& cli = m_cl->getCellListIndexer(); - ArrayHandle h_cell_list(m_cl->getCellList(), - access_location::host, - access_mode::read); - ArrayHandle h_cell_np(m_cl->getCellSizeArray(), - access_location::host, - access_mode::read); - - // MPCD particle data - const unsigned int N_mpcd = m_mpcd_pdata->getN() + m_mpcd_pdata->getNVirtual(); - const Scalar mpcd_mass = m_mpcd_pdata->getMass(); - ArrayHandle h_vel(m_mpcd_pdata->getVelocities(), - access_location::host, - access_mode::read); - - // Embedded particle data - std::unique_ptr> h_embed_vel; - std::unique_ptr> h_embed_member_idx; - if (m_cl->getEmbeddedGroup()) - { - h_embed_vel.reset(new ArrayHandle(m_pdata->getVelocities(), - access_location::host, - access_mode::read)); - h_embed_member_idx.reset( - new ArrayHandle(m_cl->getEmbeddedGroup()->getIndexArray(), - access_location::host, - access_mode::read)); - } - - // Cell properties - ArrayHandle h_cell_vel(m_cell_vel, access_location::host, access_mode::readwrite); - ArrayHandle h_cell_energy(m_cell_energy, - access_location::host, - access_mode::readwrite); - mpcd::detail::CellPropertySum summer(h_cell_list.data, - h_cell_np.data, - cli, - h_vel.data, - mpcd_mass, - (m_cl->getEmbeddedGroup()) ? h_embed_vel->data : NULL, - (m_cl->getEmbeddedGroup()) ? h_embed_member_idx->data - : NULL, - N_mpcd); - - // determine which cells are inner - uint3 lo, hi; - const Index3D& ci = m_cl->getCellIndexer(); -#ifdef ENABLE_MPI - if (m_use_mpi) - { - auto num_comm_cells = m_cl->getNComm(); - lo = make_uint3(num_comm_cells[static_cast(mpcd::detail::face::west)], - num_comm_cells[static_cast(mpcd::detail::face::south)], - num_comm_cells[static_cast(mpcd::detail::face::down)]); - hi = make_uint3( - ci.getW() - num_comm_cells[static_cast(mpcd::detail::face::east)], - ci.getH() - num_comm_cells[static_cast(mpcd::detail::face::north)], - ci.getD() - num_comm_cells[static_cast(mpcd::detail::face::up)]); - } - else -#endif // ENABLE_MPI - { - lo = make_uint3(0, 0, 0); - hi = m_cl->getDim(); - } - - // iterate over all of the inner cells and compute average velocity, energy, temperature - const bool need_energy = m_flags[mpcd::detail::thermo_options::energy]; - for (unsigned int k = lo.z; k < hi.z; ++k) - { - for (unsigned int j = lo.y; j < hi.y; ++j) - { - for (unsigned int i = lo.x; i < hi.x; ++i) - { - const unsigned int cur_cell = ci(i, j, k); - - // compute the cell properties - double4 momentum; - double ke(0.0); - unsigned int np(0); - summer.compute(momentum, ke, np, cur_cell, need_energy); - - const double mass = momentum.w; - double3 vel_cm = make_double3(0.0, 0.0, 0.0); - if (mass > 0.) - { - vel_cm.x = momentum.x / mass; - vel_cm.y = momentum.y / mass; - vel_cm.z = momentum.z / mass; - } - - h_cell_vel.data[cur_cell] = make_double4(vel_cm.x, vel_cm.y, vel_cm.z, mass); - if (need_energy) - { - double temp(0.0); - if (np > 1) - { - const double ke_cm - = 0.5 * mass - * (vel_cm.x * vel_cm.x + vel_cm.y * vel_cm.y + vel_cm.z * vel_cm.z); - temp = 2. * (ke - ke_cm) / (m_sysdef->getNDimensions() * (np - 1)); - } - h_cell_energy.data[cur_cell] = make_double3(ke, temp, __int_as_double(np)); - } - } // i - } // j - } // k - } - -void mpcd::CellThermoCompute::computeNetProperties() - { - // first reduce the properties on the rank - unsigned int n_temp_cells = 0; - { - const Index3D& ci = m_cl->getCellIndexer(); - uint3 upper = make_uint3(ci.getW(), ci.getH(), ci.getD()); -#ifdef ENABLE_MPI - // in MPI, remove duplicate cells along direction of communication - if (m_use_mpi) - { - auto num_comm = m_cl->getNComm(); - upper.x -= num_comm[static_cast(mpcd::detail::face::east)]; - upper.y -= num_comm[static_cast(mpcd::detail::face::north)]; - upper.z -= num_comm[static_cast(mpcd::detail::face::up)]; - } -#endif // ENABLE_MPI - - ArrayHandle h_cell_vel(m_cell_vel, access_location::host, access_mode::read); - ArrayHandle h_cell_energy(m_cell_energy, access_location::host, access_mode::read); - - const bool need_energy = m_flags[mpcd::detail::thermo_options::energy]; - - double3 net_momentum = make_double3(0, 0, 0); - double energy(0.0), temp(0.0); - for (unsigned int k = 0; k < upper.z; ++k) - { - for (unsigned int j = 0; j < upper.y; ++j) - { - for (unsigned int i = 0; i < upper.x; ++i) - { - const unsigned int idx = ci(i, j, k); - - const double4 cell_vel_mass = h_cell_vel.data[idx]; - const double3 cell_vel - = make_double3(cell_vel_mass.x, cell_vel_mass.y, cell_vel_mass.z); - const double cell_mass = cell_vel_mass.w; - - net_momentum.x += cell_mass * cell_vel.x; - net_momentum.y += cell_mass * cell_vel.y; - net_momentum.z += cell_mass * cell_vel.z; - - if (need_energy) - { - const double3 cell_energy = h_cell_energy.data[idx]; - energy += cell_energy.x; - - if (__double_as_int(cell_energy.z) > 1) - { - temp += cell_energy.y; - ++n_temp_cells; - } - } - } - } - } - - ArrayHandle h_net_properties(m_net_properties, - access_location::host, - access_mode::overwrite); - h_net_properties.data[mpcd::detail::thermo_index::momentum_x] = net_momentum.x; - h_net_properties.data[mpcd::detail::thermo_index::momentum_y] = net_momentum.y; - h_net_properties.data[mpcd::detail::thermo_index::momentum_z] = net_momentum.z; - - h_net_properties.data[mpcd::detail::thermo_index::energy] = energy; - h_net_properties.data[mpcd::detail::thermo_index::temperature] = temp; - } - -#ifdef ENABLE_MPI - if (m_use_mpi) - { - ArrayHandle h_net_properties(m_net_properties, - access_location::host, - access_mode::readwrite); - MPI_Allreduce(MPI_IN_PLACE, - h_net_properties.data, - mpcd::detail::thermo_index::num_quantities, - MPI_DOUBLE, - MPI_SUM, - m_exec_conf->getMPICommunicator()); - - MPI_Allreduce(MPI_IN_PLACE, - &n_temp_cells, - 1, - MPI_UNSIGNED, - MPI_SUM, - m_exec_conf->getMPICommunicator()); - } -#endif // ENABLE_MPI - - if (n_temp_cells > 0) - { - ArrayHandle h_net_properties(m_net_properties, - access_location::host, - access_mode::readwrite); - h_net_properties.data[mpcd::detail::thermo_index::temperature] /= (double)n_temp_cells; - } - - m_needs_net_reduce = false; - } - -void mpcd::CellThermoCompute::updateFlags() - { - mpcd::detail::ThermoFlags flags; - - if (!m_flag_signal.empty()) - { - m_flag_signal.emit_accumulate([&](mpcd::detail::ThermoFlags f) { flags |= f; }); - } - - m_flags = flags; - } - -/*! - * \param ncells Number of cells - */ -void mpcd::CellThermoCompute::reallocate(unsigned int ncells) - { - // Grow arrays to match the size if necessary - m_cell_vel.resize(ncells); - m_cell_energy.resize(ncells); - - m_ncells_alloc = ncells; - } - -namespace mpcd - { -namespace detail - { -/*! - * \param m Python module to export to - */ -void export_CellThermoCompute(pybind11::module& m) - { - pybind11::class_>( - m, - "CellThermoCompute") - .def(pybind11::init, std::shared_ptr>()); - } - } // namespace detail - } // namespace mpcd - } // end namespace hoomd diff --git a/hoomd/mpcd/CellThermoCompute.h b/hoomd/mpcd/CellThermoCompute.h deleted file mode 100644 index 0f7c60923d..0000000000 --- a/hoomd/mpcd/CellThermoCompute.h +++ /dev/null @@ -1,202 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellThermoCompute.h - * \brief Declaration of mpcd::CellThermoCompute - */ - -#ifndef MPCD_CELL_THERMO_COMPUTE_H_ -#define MPCD_CELL_THERMO_COMPUTE_H_ - -#ifdef __HIPCC__ -#error This header cannot be compiled by nvcc -#endif - -#include "CellList.h" -#include "CellThermoTypes.h" -#ifdef ENABLE_MPI -#include "CellCommunicator.h" -#endif // ENABLE_MPI - -#include "hoomd/Compute.h" -#include "hoomd/SystemDefinition.h" -#include "hoomd/extern/nano-signal-slot/nano_signal_slot.hpp" -#include - -namespace hoomd - { -namespace mpcd - { -//! Computes the cell (thermodynamic) properties -class PYBIND11_EXPORT CellThermoCompute : public Compute - { - public: - //! Constructor - CellThermoCompute(std::shared_ptr sysdef, std::shared_ptr cl); - - //! Destructor - virtual ~CellThermoCompute(); - - //! Compute the cell thermodynamic properties - void compute(uint64_t timestep) override; - - //! Start autotuning kernel launch parameters - void startAutotuning() override; - - //! Check if kernel autotuning is complete - bool isAutotuningComplete() override; - - //! Get the cell indexer for the attached cell list - const Index3D& getCellIndexer() const - { - return m_cl->getCellIndexer(); - } - - //! Get the cell velocities from the last call to compute - const GPUArray& getCellVelocities() const - { - return m_cell_vel; - } - - //! Get the cell energies from the last call to compute - const GPUArray& getCellEnergies() const - { - return m_cell_energy; - } - - //! Get the net momentum of the particles from the last call to compute - Scalar3 getNetMomentum() - { - if (m_needs_net_reduce) - computeNetProperties(); - - ArrayHandle h_net_properties(m_net_properties, - access_location::host, - access_mode::read); - const Scalar3 net_momentum - = make_scalar3(h_net_properties.data[mpcd::detail::thermo_index::momentum_x], - h_net_properties.data[mpcd::detail::thermo_index::momentum_y], - h_net_properties.data[mpcd::detail::thermo_index::momentum_z]); - return net_momentum; - } - - //! Get the net energy of the particles from the last call to compute - Scalar getNetEnergy() - { - if (!m_flags[mpcd::detail::thermo_options::energy]) - { - m_exec_conf->msg->error() - << "Energy requested from CellThermoCompute, but was not computed." << std::endl; - throw std::runtime_error("Net cell energy not available"); - } - - if (m_needs_net_reduce) - computeNetProperties(); - - ArrayHandle h_net_properties(m_net_properties, - access_location::host, - access_mode::read); - return h_net_properties.data[mpcd::detail::thermo_index::energy]; - } - - //! Get the average cell temperature from the last call to compute - Scalar getTemperature() - { - if (!m_flags[mpcd::detail::thermo_options::energy]) - { - m_exec_conf->msg->error() - << "Temperature requested from CellThermoCompute, but was not computed." - << std::endl; - throw std::runtime_error("Net cell temperature not available"); - } - if (m_needs_net_reduce) - computeNetProperties(); - - ArrayHandle h_net_properties(m_net_properties, - access_location::host, - access_mode::read); - return h_net_properties.data[mpcd::detail::thermo_index::temperature]; - } - - //! Get the signal for requested thermo flags - /*! - * \returns A signal that subscribers can attach a callback to in order - * to request certain data. - * - * For performance reasons, the CellThermoCompute should be able to - * supply many related cell-level quantities from a single kernel launch. - * However, sometimes these quantities are not needed, and it is better - * to skip calculating them. Subscribing classes can optionally request - * some of these quantities via a callback return mpcd::detail::ThermoFlags - * with the appropriate bits set. - */ - Nano::Signal& getFlagsSignal() - { - return m_flag_signal; - } - - //! Get the signal to attach a callback function - Nano::Signal& getCallbackSignal() - { - return m_callbacks; - } - - protected: - //! Compute the cell properties - void computeCellProperties(uint64_t timestep); - -#ifdef ENABLE_MPI - //! Begin the calculation of outer cell properties - virtual void beginOuterCellProperties(); - - //! Finish the calculation of outer cell properties - virtual void finishOuterCellProperties(); -#endif // ENABLE_MPI - - //! Calculate the inner cell properties - virtual void calcInnerCellProperties(); - - //! Compute the net properties from the cell properties - virtual void computeNetProperties(); - - std::shared_ptr m_mpcd_pdata; //!< MPCD particle data - std::shared_ptr m_cl; //!< MPCD cell list -#ifdef ENABLE_MPI - bool m_use_mpi; //!< Flag if communication is required - std::shared_ptr m_vel_comm; //!< Cell velocity communicator - std::shared_ptr m_energy_comm; //!< Cell energy communicator -#endif // ENABLE_MPI - - bool m_needs_net_reduce; //!< Flag if a net reduction is necessary - GPUArray m_net_properties; //!< Scalar properties of the system - - GPUVector m_cell_vel; //!< Average velocity of a cell + cell mass - GPUVector m_cell_energy; //!< Kinetic energy, unscaled temperature, dof in each cell - unsigned int m_ncells_alloc; //!< Number of cells allocated for - - Nano::Signal m_flag_signal; //!< Signal for requested flags - mpcd::detail::ThermoFlags m_flags; //!< Requested thermo flags - //! Updates the requested optional flags - void updateFlags(); - - Nano::Signal m_callbacks; //!< Signal for callback functions - - private: - //! Allocate memory per cell - void reallocate(unsigned int ncells); - - //! Slot for the number of virtual particles changing - /*! - * All thermo properties should be recomputed if the number of virtual particles changes. - * The cases where the thermo is first used without virtual particles - * followed by with virtual particles are small, and so it is easiest to just recompute. - */ - void slotNumVirtual() - { - m_force_compute = true; - } - }; - } // end namespace mpcd - } // end namespace hoomd -#endif // #define MPCD_CELL_THERMO_COMPUTE_H_ diff --git a/hoomd/mpcd/CellThermoComputeGPU.cc b/hoomd/mpcd/CellThermoComputeGPU.cc deleted file mode 100644 index bd75883cac..0000000000 --- a/hoomd/mpcd/CellThermoComputeGPU.cc +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellThermoComputeGPU.cc - * \brief Definition of mpcd::CellThermoComputeGPU - */ - -#include "CellThermoComputeGPU.h" -#include "ReductionOperators.h" - -namespace hoomd - { -/*! - * \param sysdef System definition - * \param cl MPCD cell list - */ -mpcd::CellThermoComputeGPU::CellThermoComputeGPU(std::shared_ptr sysdef, - std::shared_ptr cl) - : mpcd::CellThermoCompute(sysdef, cl), m_tmp_thermo(m_exec_conf), m_reduced(m_exec_conf) - { - m_begin_tuner.reset(new Autotuner<2>({AutotunerBase::makeBlockSizeRange(m_exec_conf), - AutotunerBase::getTppListPow2(this->m_exec_conf)}, - m_exec_conf, - "mpcd_cell_thermo_begin")); - m_end_tuner.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_cell_thermo_end")); - m_inner_tuner.reset(new Autotuner<2>({AutotunerBase::makeBlockSizeRange(m_exec_conf), - AutotunerBase::getTppListPow2(this->m_exec_conf)}, - m_exec_conf, - "mpcd_cell_thermo_inner")); - m_stage_tuner.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_cell_thermo_stage")); - - m_autotuners.insert(m_autotuners.end(), - {m_begin_tuner, m_end_tuner, m_inner_tuner, m_stage_tuner}); - } - -mpcd::CellThermoComputeGPU::~CellThermoComputeGPU() { } - -#ifdef ENABLE_MPI -void mpcd::CellThermoComputeGPU::beginOuterCellProperties() - { - ArrayHandle d_cell_vel(m_cell_vel, access_location::device, access_mode::overwrite); - ArrayHandle d_cell_energy(m_cell_energy, - access_location::device, - access_mode::overwrite); - - ArrayHandle d_cells(m_vel_comm->getCells(), - access_location::device, - access_mode::read); - ArrayHandle d_cell_np(m_cl->getCellSizeArray(), - access_location::device, - access_mode::read); - ArrayHandle d_cell_list(m_cl->getCellList(), - access_location::device, - access_mode::read); - - ArrayHandle d_vel(m_mpcd_pdata->getVelocities(), - access_location::device, - access_mode::read); - - if (m_cl->getEmbeddedGroup()) - { - // Embedded particle data - ArrayHandle d_embed_vel(m_pdata->getVelocities(), - access_location::device, - access_mode::read); - ArrayHandle d_embed_cell(m_cl->getEmbeddedGroup()->getIndexArray(), - access_location::device, - access_mode::read); - - mpcd::detail::thermo_args_t args(d_cell_vel.data, - d_cell_energy.data, - d_cell_np.data, - d_cell_list.data, - m_cl->getCellListIndexer(), - d_vel.data, - m_mpcd_pdata->getN() + m_mpcd_pdata->getNVirtual(), - m_mpcd_pdata->getMass(), - d_embed_vel.data, - d_embed_cell.data, - m_flags[mpcd::detail::thermo_options::energy]); - - m_begin_tuner->begin(); - auto param = m_begin_tuner->getParam(); - const unsigned int block_size = param[0]; - const unsigned int tpp = param[1]; - gpu::begin_cell_thermo(args, d_cells.data, m_vel_comm->getNCells(), block_size, tpp); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_begin_tuner->end(); - } - else - { - mpcd::detail::thermo_args_t args(d_cell_vel.data, - d_cell_energy.data, - d_cell_np.data, - d_cell_list.data, - m_cl->getCellListIndexer(), - d_vel.data, - m_mpcd_pdata->getN() + m_mpcd_pdata->getNVirtual(), - m_mpcd_pdata->getMass(), - NULL, - NULL, - m_flags[mpcd::detail::thermo_options::energy]); - - m_begin_tuner->begin(); - auto param = m_begin_tuner->getParam(); - const unsigned int block_size = param[0]; - const unsigned int tpp = param[1]; - gpu::begin_cell_thermo(args, d_cells.data, m_vel_comm->getNCells(), block_size, tpp); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_begin_tuner->end(); - } - } - -void mpcd::CellThermoComputeGPU::finishOuterCellProperties() - { - ArrayHandle d_cell_vel(m_cell_vel, access_location::device, access_mode::readwrite); - ArrayHandle d_cell_energy(m_cell_energy, - access_location::device, - access_mode::readwrite); - ArrayHandle d_cells(m_vel_comm->getCells(), - access_location::device, - access_mode::read); - m_end_tuner->begin(); - gpu::end_cell_thermo(d_cell_vel.data, - d_cell_energy.data, - d_cells.data, - m_vel_comm->getNCells(), - m_sysdef->getNDimensions(), - m_flags[mpcd::detail::thermo_options::energy], - m_end_tuner->getParam()[0]); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_end_tuner->end(); - } -#endif // ENABLE_MPI - -void mpcd::CellThermoComputeGPU::calcInnerCellProperties() - { - ArrayHandle d_cell_vel(m_cell_vel, access_location::device, access_mode::overwrite); - ArrayHandle d_cell_energy(m_cell_energy, - access_location::device, - access_mode::overwrite); - - ArrayHandle d_cell_np(m_cl->getCellSizeArray(), - access_location::device, - access_mode::read); - ArrayHandle d_cell_list(m_cl->getCellList(), - access_location::device, - access_mode::read); - - ArrayHandle d_vel(m_mpcd_pdata->getVelocities(), - access_location::device, - access_mode::read); - - /* - * Determine the inner cell indexer and offset. The inner indexer is the cube containing - * all non-communicating cells, and its offset is the number of communicating cells on the - * lo side of the box. - */ - uint3 lo, hi; - const Index3D& ci = m_cl->getCellIndexer(); -#ifdef ENABLE_MPI - if (m_use_mpi) - { - auto num_comm_cells = m_cl->getNComm(); - lo = make_uint3(num_comm_cells[static_cast(mpcd::detail::face::west)], - num_comm_cells[static_cast(mpcd::detail::face::south)], - num_comm_cells[static_cast(mpcd::detail::face::down)]); - hi = make_uint3( - ci.getW() - num_comm_cells[static_cast(mpcd::detail::face::east)], - ci.getH() - num_comm_cells[static_cast(mpcd::detail::face::north)], - ci.getD() - num_comm_cells[static_cast(mpcd::detail::face::up)]); - } - else -#endif // ENABLE_MPI - { - lo = make_uint3(0, 0, 0); - hi = m_cl->getDim(); - } - Index3D inner_ci(hi.x - lo.x, hi.y - lo.y, hi.z - lo.z); - - if (m_cl->getEmbeddedGroup()) - { - // Embedded particle data - ArrayHandle d_embed_vel(m_pdata->getVelocities(), - access_location::device, - access_mode::read); - ArrayHandle d_embed_cell(m_cl->getEmbeddedGroup()->getIndexArray(), - access_location::device, - access_mode::read); - - mpcd::detail::thermo_args_t args(d_cell_vel.data, - d_cell_energy.data, - d_cell_np.data, - d_cell_list.data, - m_cl->getCellListIndexer(), - d_vel.data, - m_mpcd_pdata->getN() + m_mpcd_pdata->getNVirtual(), - m_mpcd_pdata->getMass(), - d_embed_vel.data, - d_embed_cell.data, - m_flags[mpcd::detail::thermo_options::energy]); - - m_inner_tuner->begin(); - auto param = m_inner_tuner->getParam(); - const unsigned int block_size = param[0]; - const unsigned int tpp = param[1]; - gpu::inner_cell_thermo(args, ci, inner_ci, lo, m_sysdef->getNDimensions(), block_size, tpp); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_inner_tuner->end(); - } - else - { - mpcd::detail::thermo_args_t args(d_cell_vel.data, - d_cell_energy.data, - d_cell_np.data, - d_cell_list.data, - m_cl->getCellListIndexer(), - d_vel.data, - m_mpcd_pdata->getN() + m_mpcd_pdata->getNVirtual(), - m_mpcd_pdata->getMass(), - NULL, - NULL, - m_flags[mpcd::detail::thermo_options::energy]); - - m_inner_tuner->begin(); - auto param = m_inner_tuner->getParam(); - const unsigned int block_size = param[0]; - const unsigned int tpp = param[1]; - gpu::inner_cell_thermo(args, ci, inner_ci, lo, m_sysdef->getNDimensions(), block_size, tpp); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_inner_tuner->end(); - } - } - -void mpcd::CellThermoComputeGPU::computeNetProperties() - { - // first reduce the properties on the rank - { - const Index3D& ci = m_cl->getCellIndexer(); - uint3 upper = make_uint3(ci.getW(), ci.getH(), ci.getD()); -#ifdef ENABLE_MPI - // in MPI, remove duplicate cells along direction of communication - if (m_use_mpi) - { - auto num_comm = m_cl->getNComm(); - upper.x -= num_comm[static_cast(mpcd::detail::face::east)]; - upper.y -= num_comm[static_cast(mpcd::detail::face::north)]; - upper.z -= num_comm[static_cast(mpcd::detail::face::up)]; - } -#endif // ENABLE_MPI - - // temporary cell indexer for mapping 1d kernel threads to 3d grid - const Index3D tmp_ci(upper.x, upper.y, upper.z); - m_tmp_thermo.resize(tmp_ci.getNumElements()); - - ArrayHandle d_tmp_thermo(m_tmp_thermo, - access_location::device, - access_mode::overwrite); - ArrayHandle d_cell_vel(m_cell_vel, access_location::device, access_mode::read); - ArrayHandle d_cell_energy(m_cell_energy, - access_location::device, - access_mode::read); - - m_stage_tuner->begin(); - mpcd::gpu::stage_net_cell_thermo(d_tmp_thermo.data, - d_cell_vel.data, - d_cell_energy.data, - tmp_ci, - ci, - m_flags[mpcd::detail::thermo_options::energy], - m_stage_tuner->getParam()[0]); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_stage_tuner->end(); - - // use cub to reduce the properties on the gpu - void* d_tmp = NULL; - size_t tmp_bytes = 0; - mpcd::gpu::reduce_net_cell_thermo(m_reduced.getDeviceFlags(), - d_tmp, - tmp_bytes, - d_tmp_thermo.data, - m_tmp_thermo.size()); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - - ScopedAllocation d_tmp_alloc(m_exec_conf->getCachedAllocator(), - (tmp_bytes > 0) ? tmp_bytes : 1); - d_tmp = (void*)d_tmp_alloc(); - - mpcd::gpu::reduce_net_cell_thermo(m_reduced.getDeviceFlags(), - d_tmp, - tmp_bytes, - d_tmp_thermo.data, - m_tmp_thermo.size()); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - } - - // now copy the net properties back to host from the flags - unsigned int n_temp_cells = 0; - { - const mpcd::detail::cell_thermo_element reduced = m_reduced.readFlags(); - - ArrayHandle h_net_properties(m_net_properties, - access_location::host, - access_mode::overwrite); - h_net_properties.data[mpcd::detail::thermo_index::momentum_x] = reduced.momentum.x; - h_net_properties.data[mpcd::detail::thermo_index::momentum_y] = reduced.momentum.y; - h_net_properties.data[mpcd::detail::thermo_index::momentum_z] = reduced.momentum.z; - - h_net_properties.data[mpcd::detail::thermo_index::energy] = reduced.energy; - h_net_properties.data[mpcd::detail::thermo_index::temperature] = reduced.temperature; - - n_temp_cells = reduced.flag; - } - -#ifdef ENABLE_MPI - if (m_use_mpi) - { - ArrayHandle h_net_properties(m_net_properties, - access_location::host, - access_mode::readwrite); - MPI_Allreduce(MPI_IN_PLACE, - h_net_properties.data, - mpcd::detail::thermo_index::num_quantities, - MPI_DOUBLE, - MPI_SUM, - m_exec_conf->getMPICommunicator()); - - MPI_Allreduce(MPI_IN_PLACE, - &n_temp_cells, - 1, - MPI_UNSIGNED, - MPI_SUM, - m_exec_conf->getMPICommunicator()); - } -#endif // ENABLE_MPI - - if (n_temp_cells > 0) - { - ArrayHandle h_net_properties(m_net_properties, - access_location::host, - access_mode::readwrite); - h_net_properties.data[mpcd::detail::thermo_index::temperature] /= (double)n_temp_cells; - } - - m_needs_net_reduce = false; - } - -namespace mpcd - { -namespace detail - { -void export_CellThermoComputeGPU(pybind11::module& m) - { - pybind11::class_>(m, "CellThermoComputeGPU") - .def(pybind11::init, std::shared_ptr>()); - } - } // namespace detail - } // namespace mpcd - } // end namespace hoomd diff --git a/hoomd/mpcd/CellThermoComputeGPU.cu b/hoomd/mpcd/CellThermoComputeGPU.cu deleted file mode 100644 index 1059096075..0000000000 --- a/hoomd/mpcd/CellThermoComputeGPU.cu +++ /dev/null @@ -1,808 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellThermoComputeGPU.cu - * \brief Explicitly instantiates reduction operators and declares kernel drivers - * for mpcd::CellThermoComputeGPU. - */ - -#include - -#include "CellThermoComputeGPU.cuh" -#include "CellThermoTypes.h" - -#include "CellCommunicator.cuh" -#include "ReductionOperators.h" - -#include "hoomd/WarpTools.cuh" - -namespace hoomd - { -namespace mpcd - { -namespace gpu - { -namespace kernel - { -//! Begins the cell thermo compute by summing cell quantities on outer cells -/*! - * \param d_cell_vel Velocity and mass per cell (output) - * \param d_cell_energy Energy, temperature, number of particles per cell (output) - * \param d_cells Cell indexes to compute - * \param d_cell_np Number of particles per cell - * \param d_cell_list MPCD cell list - * \param cli Indexer into the cell list - * \param d_vel MPCD particle velocities - * \param N_mpcd Number of MPCD particles - * \param mpcd_mass Mass of MPCD particle - * \param d_embed_vel Embedded particle velocity - * \param d_embed_idx Embedded particle indexes - * \param num_cells Number of cells to compute for - * - * \tparam need_energy If true, compute the cell-level energy properties - * \tparam tpp Number of threads to use per cell - * - * \b Implementation details: - * Using \a tpp threads per cell, the cell properties are accumulated into \a d_cell_vel - * and \a d_cell_energy. Shuffle-based intrinsics are used to reduce the accumulated - * properties per-cell, and the first thread for each cell writes the result into - * global memory. - */ -template -__global__ void begin_cell_thermo(double4* d_cell_vel, - double3* d_cell_energy, - const unsigned int* d_cells, - const unsigned int* d_cell_np, - const unsigned int* d_cell_list, - const Index2D cli, - const Scalar4* d_vel, - const unsigned int N_mpcd, - const Scalar mpcd_mass, - const Scalar4* d_embed_vel, - const unsigned int* d_embed_idx, - const unsigned int num_cells) - { - // tpp threads per cell - unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= tpp * num_cells) - return; - - const unsigned int cell_id = d_cells[idx / tpp]; - const unsigned int np = d_cell_np[cell_id]; - double4 momentum = make_double4(0.0, 0.0, 0.0, 0.0); - double ke(0.0); - - for (unsigned int offset = (idx % tpp); offset < np; offset += tpp) - { - // Load particle data - const unsigned int cur_p = d_cell_list[cli(offset, cell_id)]; - double3 vel_i; - double mass_i; - if (cur_p < N_mpcd) - { - Scalar4 vel_cell = d_vel[cur_p]; - vel_i = make_double3(vel_cell.x, vel_cell.y, vel_cell.z); - mass_i = mpcd_mass; - } - else - { - Scalar4 vel_m = d_embed_vel[d_embed_idx[cur_p - N_mpcd]]; - vel_i = make_double3(vel_m.x, vel_m.y, vel_m.z); - mass_i = vel_m.w; - } - - // add momentum - momentum.x += mass_i * vel_i.x; - momentum.y += mass_i * vel_i.y; - momentum.z += mass_i * vel_i.z; - momentum.w += mass_i; - - // also compute ke of the particle - if (need_energy) - ke += (double)(0.5) * mass_i - * (vel_i.x * vel_i.x + vel_i.y * vel_i.y + vel_i.z * vel_i.z); - } - - // reduce quantities down into the 0-th lane per logical warp - if (tpp > 1) - { - hoomd::detail::WarpReduce reducer; - momentum.x = reducer.Sum(momentum.x); - momentum.y = reducer.Sum(momentum.y); - momentum.z = reducer.Sum(momentum.z); - momentum.w = reducer.Sum(momentum.w); - if (need_energy) - ke = reducer.Sum(ke); - } - - // 0-th lane in each warp writes the result - if (idx % tpp == 0) - { - d_cell_vel[cell_id] = make_double4(momentum.x, momentum.y, momentum.z, momentum.w); - if (need_energy) - d_cell_energy[cell_id] = make_double3(ke, 0.0, __int_as_double(np)); - } - } - -//! Finalizes the cell thermo compute by properly averaging cell quantities -/*! - * \param d_cell_vel Cell velocity and masses - * \param d_cell_energy Cell energy and temperature - * \param d_cells Cells to compute for - * \param Ncell Number of cells - * \param n_dimensions Number of dimensions in system - * - * \tparam need_energy If true, compute the cell-level energy properties. - * - * \b Implementation details: - * Using one thread per cell, the properties are averaged by mass, number of particles, - * etc. The temperature is computed from the cell kinetic energy. - */ -template -__global__ void end_cell_thermo(double4* d_cell_vel, - double3* d_cell_energy, - const unsigned int* d_cells, - const unsigned int Ncell, - const unsigned int n_dimensions) - { - // one thread per cell - unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= Ncell) - return; - - const unsigned int cell_id = d_cells[idx]; - - // average cell properties if the cell has mass - const double4 cell_vel = d_cell_vel[cell_id]; - double3 vel_cm = make_double3(cell_vel.x, cell_vel.y, cell_vel.z); - const double mass = cell_vel.w; - - if (mass > 0.) - { - // average velocity is only defined when there is some mass in the cell - vel_cm.x /= mass; - vel_cm.y /= mass; - vel_cm.z /= mass; - } - d_cell_vel[cell_id] = make_double4(vel_cm.x, vel_cm.y, vel_cm.z, mass); - - if (need_energy) - { - const double3 cell_energy = d_cell_energy[cell_id]; - const double ke = cell_energy.x; - double temp(0.0); - const unsigned int np = __double_as_int(cell_energy.z); - // temperature is only defined for 2 or more particles - if (np > 1) - { - const double ke_cm - = 0.5 * mass * (vel_cm.x * vel_cm.x + vel_cm.y * vel_cm.y + vel_cm.z * vel_cm.z); - temp = 2. * (ke - ke_cm) / (n_dimensions * (np - 1)); - } - d_cell_energy[cell_id] = make_double3(ke, temp, __int_as_double(np)); - } - } - -//! Computes the cell thermo for inner cells -/*! - * \param d_cell_vel Velocity and mass per cell (output) - * \param d_cell_energy Energy, temperature, number of particles per cell (output) - * \param ci Cell indexer - * \param inner_ci Cell indexer for the inner cells - * \param offset Offset of \a inner_ci from \a ci - * \param d_cell_np Number of particles per cell - * \param d_cell_list MPCD cell list - * \param cli Indexer into the cell list - * \param d_vel MPCD particle velocities - * \param N_mpcd Number of MPCD particles - * \param mpcd_mass Mass of MPCD particle - * \param d_embed_vel Embedded particle velocity - * \param d_embed_idx Embedded particle indexes - * \param n_dimensions System dimensionality - * - * \tparam need_energy If true, compute the cell-level energy properties. - * \tparam tpp Number of threads to use per cell - * - * \b Implementation details: - * Using \a tpp threads per cell, the cell properties are accumulated into \a d_cell_vel - * and \a d_cell_energy. Shuffle-based intrinsics are used to reduce the accumulated - * properties per-cell, and the first thread for each cell writes the result into - * global memory. The properties are properly normalized - * - * See mpcd::gpu::kernel::begin_cell_thermo for an almost identical implementation - * without the normalization at the end, which is used for the outer cells. - */ -template -__global__ void inner_cell_thermo(double4* d_cell_vel, - double3* d_cell_energy, - const Index3D ci, - const Index3D inner_ci, - const uint3 offset, - const unsigned int* d_cell_np, - const unsigned int* d_cell_list, - const Index2D cli, - const Scalar4* d_vel, - const unsigned int N_mpcd, - const Scalar mpcd_mass, - const Scalar4* d_embed_vel, - const unsigned int* d_embed_idx, - const unsigned int n_dimensions) - { - // tpp threads per cell - unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= tpp * inner_ci.getNumElements()) - return; - - // reinterpret the thread id as a cell by first mapping the thread into the inner indexer, - // shifting by the offset of the inner indexer from the full indexer, and then compressing - // back into a 1D cell id - const uint3 inner_cell = inner_ci.getTriple(idx / tpp); - const uint3 cell - = make_uint3(inner_cell.x + offset.x, inner_cell.y + offset.y, inner_cell.z + offset.z); - const unsigned int cell_id = ci(cell.x, cell.y, cell.z); - - const unsigned int np = d_cell_np[cell_id]; - double4 momentum = make_double4(0.0, 0.0, 0.0, 0.0); - double ke(0.0); - - for (unsigned int offset = (idx % tpp); offset < np; offset += tpp) - { - // Load particle data - const unsigned int cur_p = d_cell_list[cli(offset, cell_id)]; - double3 vel_i; - double mass_i; - if (cur_p < N_mpcd) - { - Scalar4 vel_cell = d_vel[cur_p]; - vel_i = make_double3(vel_cell.x, vel_cell.y, vel_cell.z); - mass_i = mpcd_mass; - } - else - { - Scalar4 vel_m = d_embed_vel[d_embed_idx[cur_p - N_mpcd]]; - vel_i = make_double3(vel_m.x, vel_m.y, vel_m.z); - mass_i = vel_m.w; - } - - // add momentum - momentum.x += mass_i * vel_i.x; - momentum.y += mass_i * vel_i.y; - momentum.z += mass_i * vel_i.z; - momentum.w += mass_i; - - // also compute ke of the particle - if (need_energy) - ke += 0.5 * mass_i * (vel_i.x * vel_i.x + vel_i.y * vel_i.y + vel_i.z * vel_i.z); - } - - // reduce quantities down into the 0-th lane per logical warp - if (tpp > 1) - { - hoomd::detail::WarpReduce reducer; - momentum.x = reducer.Sum(momentum.x); - momentum.y = reducer.Sum(momentum.y); - momentum.z = reducer.Sum(momentum.z); - momentum.w = reducer.Sum(momentum.w); - if (need_energy) - ke = reducer.Sum(ke); - } - - // 0-th lane in each warp writes the result - if (idx % tpp == 0) - { - const double mass = momentum.w; - double3 vel_cm = make_double3(0.0, 0.0, 0.0); - if (mass > 0.) - { - vel_cm.x = momentum.x / mass; - vel_cm.y = momentum.y / mass; - vel_cm.z = momentum.z / mass; - } - d_cell_vel[cell_id] = make_double4(vel_cm.x, vel_cm.y, vel_cm.z, mass); - - if (need_energy) - { - double temp(0.0); - if (np > 1) - { - const double ke_cm - = 0.5 * mass - * (vel_cm.x * vel_cm.x + vel_cm.y * vel_cm.y + vel_cm.z * vel_cm.z); - temp = 2. * (ke - ke_cm) / (n_dimensions * (np - 1)); - } - d_cell_energy[cell_id] = make_double3(ke, temp, __int_as_double(np)); - } - } - } - -/*! - * \param d_tmp_thermo Temporary cell packed thermo element - * \param d_cell_vel Cell velocity to reduce - * \param d_cell_energy Cell energy to reduce - * \param tmp_ci Temporary cell indexer for cells undergoing reduction - * \param ci Cell indexer Regular cell list indexer - * - * \tparam need_energy If true, compute the cell-level energy properties. - * - * \b Implementation details: - * Using one thread per \a temporary cell, the cell properties are normalized - * in a way suitable for reduction of net properties, e.g. the cell velocities - * are converted to momentum. The temperature is set to the cell energy, and a - * flag is set to 1 or 0 to indicate whether this cell has an energy that should - * be used in averaging the total temperature. - */ -template -__global__ void stage_net_cell_thermo(mpcd::detail::cell_thermo_element* d_tmp_thermo, - const double4* d_cell_vel, - const double3* d_cell_energy, - const Index3D tmp_ci, - const Index3D ci) - { - // one thread per cell - unsigned int tmp_idx = blockIdx.x * blockDim.x + threadIdx.x; - if (tmp_idx >= tmp_ci.getNumElements()) - return; - - // use the temporary cell indexer to map to a cell, then use the real cell indexer to - // get the read index - uint3 cell = tmp_ci.getTriple(tmp_idx); - const unsigned int idx = ci(cell.x, cell.y, cell.z); - - const double4 vel_mass = d_cell_vel[idx]; - const double3 vel = make_double3(vel_mass.x, vel_mass.y, vel_mass.z); - const double mass = vel_mass.w; - - mpcd::detail::cell_thermo_element thermo; - thermo.momentum = make_double3(mass * vel.x, mass * vel.y, mass * vel.z); - - if (need_energy) - { - const double3 cell_energy = d_cell_energy[idx]; - thermo.energy = cell_energy.x; - if (__double_as_int(cell_energy.z) > 1) - { - thermo.temperature = cell_energy.y; - thermo.flag = 1; - } - else - { - thermo.temperature = 0.0; - thermo.flag = 0; - } - } - else - { - thermo.energy = 0.; - thermo.temperature = 0.; - thermo.flag = 0; - } - - d_tmp_thermo[tmp_idx] = thermo; - } - - } // end namespace kernel - -//! Templated launcher for multiple threads-per-cell kernel for outer cells -/* - * \param args Common arguments to thermo kernels - * \param d_cells Cell indexes to compute - * \param num_cells Number of cells to compute for - * \param block_size Number of threads per block - * \param tpp Number of threads to use per-cell - * - * \tparam cur_tpp Number of threads-per-cell for this template instantiation - * - * Launchers are recursively instantiated at compile-time in order to match the - * correct number of threads at runtime. If the templated number of threads matches - * the runtime number of threads, then the kernel is launched. Otherwise, the - * next template (with threads reduced by a factor of 2) is launched. This - * recursion is broken by a specialized template for 0 threads, which does no - * work. - */ -template -inline void launch_begin_cell_thermo(const mpcd::detail::thermo_args_t& args, - const unsigned int* d_cells, - const unsigned int num_cells, - const unsigned int block_size, - const unsigned int tpp) - { - if (cur_tpp == tpp) - { - if (args.need_energy) - { - unsigned int max_block_size_energy; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, - (const void*)mpcd::gpu::kernel::begin_cell_thermo); - max_block_size_energy = attr.maxThreadsPerBlock; - - unsigned int run_block_size = min(block_size, max_block_size_energy); - dim3 grid(cur_tpp * num_cells / run_block_size + 1); - mpcd::gpu::kernel::begin_cell_thermo - <<>>(args.cell_vel, - args.cell_energy, - d_cells, - args.cell_np, - args.cell_list, - args.cli, - args.vel, - args.N_mpcd, - args.mass, - args.embed_vel, - args.embed_idx, - num_cells); - } - else - { - unsigned int max_block_size_noenergy; - cudaFuncAttributes attr; - cudaFuncGetAttributes( - &attr, - (const void*)mpcd::gpu::kernel::begin_cell_thermo); - max_block_size_noenergy = attr.maxThreadsPerBlock; - - unsigned int run_block_size = min(block_size, max_block_size_noenergy); - dim3 grid(cur_tpp * num_cells / run_block_size + 1); - mpcd::gpu::kernel::begin_cell_thermo - <<>>(args.cell_vel, - args.cell_energy, - d_cells, - args.cell_np, - args.cell_list, - args.cli, - args.vel, - args.N_mpcd, - args.mass, - args.embed_vel, - args.embed_idx, - num_cells); - } - } - else - { - launch_begin_cell_thermo(args, d_cells, num_cells, block_size, tpp); - } - } -//! Template specialization to break recursion -template<> -inline void launch_begin_cell_thermo<0>(const mpcd::detail::thermo_args_t& args, - const unsigned int* d_cells, - const unsigned int num_cells, - const unsigned int block_size, - const unsigned int tpp) - { - } - -/* - * \param args Common arguments to thermo kernels - * \param d_cells Cell indexes to compute - * \param num_cells Number of cells to compute for - * \param block_size Number of threads per block - * \param tpp Number of threads per cell - * - * \returns cudaSuccess on completion - * - * \sa mpcd::gpu::launch_begin_cell_thermo - * \sa mpcd::gpu::kernel::begin_cell_thermo - */ -cudaError_t begin_cell_thermo(const mpcd::detail::thermo_args_t& args, - const unsigned int* d_cells, - const unsigned int num_cells, - const unsigned int block_size, - const unsigned int tpp) - { - if (num_cells == 0) - return cudaSuccess; - - launch_begin_cell_thermo<32>(args, d_cells, num_cells, block_size, tpp); - return cudaSuccess; - } - -/*! - * \param d_cell_vel Cell velocity and masses - * \param d_cell_energy Cell energy and temperature - * \param d_cells Cells to compute for - * \param Ncell Number of cells - * \param n_dimensions Number of dimensions in system - * \param need_energy If true, compute the cell-level energy properties - * - * \returns cudaSuccess on completion - * - * \sa mpcd::gpu::kernel::end_cell_thermo - */ -cudaError_t end_cell_thermo(double4* d_cell_vel, - double3* d_cell_energy, - const unsigned int* d_cells, - const unsigned int Ncell, - const unsigned int n_dimensions, - const bool need_energy, - const unsigned int block_size) - { - if (Ncell == 0) - return cudaSuccess; - - if (need_energy) - { - unsigned int max_block_size_energy; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::end_cell_thermo); - max_block_size_energy = attr.maxThreadsPerBlock; - - unsigned int run_block_size = min(block_size, max_block_size_energy); - dim3 grid(Ncell / run_block_size + 1); - mpcd::gpu::kernel::end_cell_thermo - <<>>(d_cell_vel, d_cell_energy, d_cells, Ncell, n_dimensions); - } - else - { - unsigned int max_block_size_noenergy; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::end_cell_thermo); - max_block_size_noenergy = attr.maxThreadsPerBlock; - - unsigned int run_block_size = min(block_size, max_block_size_noenergy); - dim3 grid(Ncell / run_block_size + 1); - mpcd::gpu::kernel::end_cell_thermo - <<>>(d_cell_vel, d_cell_energy, d_cells, Ncell, n_dimensions); - } - - return cudaSuccess; - } - -//! Templated launcher for multiple threads-per-cell kernel for inner cells -/* - * \param args Common arguments to thermo kernels - * \param ci Cell indexer - * \param inner_ci Cell indexer for the inner cells - * \param offset Offset of \a inner_ci from \a ci - * \param n_dimensions System dimensionality - * \param block_size Number of threads per block - * \param tpp Number of threads per cell - * - * \tparam cur_tpp Number of threads-per-cell for this template instantiation - * - * Launchers are recursively instantiated at compile-time in order to match the - * correct number of threads at runtime. If the templated number of threads matches - * the runtime number of threads, then the kernel is launched. Otherwise, the - * next template (with threads reduced by a factor of 2) is launched. This - * recursion is broken by a specialized template for 0 threads, which does no - * work. - */ -template -inline void launch_inner_cell_thermo(const mpcd::detail::thermo_args_t& args, - const Index3D& ci, - const Index3D& inner_ci, - const uint3& offset, - const unsigned int n_dimensions, - const unsigned int block_size, - const unsigned int tpp) - { - if (cur_tpp == tpp) - { - if (args.need_energy) - { - unsigned int max_block_size_energy; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, - (const void*)mpcd::gpu::kernel::inner_cell_thermo); - max_block_size_energy = attr.maxThreadsPerBlock; - - unsigned int run_block_size = min(block_size, max_block_size_energy); - dim3 grid(cur_tpp * ci.getNumElements() / run_block_size + 1); - mpcd::gpu::kernel::inner_cell_thermo - <<>>(args.cell_vel, - args.cell_energy, - ci, - inner_ci, - offset, - args.cell_np, - args.cell_list, - args.cli, - args.vel, - args.N_mpcd, - args.mass, - args.embed_vel, - args.embed_idx, - n_dimensions); - } - else - { - unsigned int max_block_size_noenergy; - cudaFuncAttributes attr; - cudaFuncGetAttributes( - &attr, - (const void*)mpcd::gpu::kernel::inner_cell_thermo); - max_block_size_noenergy = attr.maxThreadsPerBlock; - - unsigned int run_block_size = min(block_size, max_block_size_noenergy); - dim3 grid(cur_tpp * ci.getNumElements() / run_block_size + 1); - mpcd::gpu::kernel::inner_cell_thermo - <<>>(args.cell_vel, - args.cell_energy, - ci, - inner_ci, - offset, - args.cell_np, - args.cell_list, - args.cli, - args.vel, - args.N_mpcd, - args.mass, - args.embed_vel, - args.embed_idx, - n_dimensions); - } - } - else - { - launch_inner_cell_thermo(args, - ci, - inner_ci, - offset, - n_dimensions, - block_size, - tpp); - } - } -//! Template specialization to break recursion -template<> -inline void launch_inner_cell_thermo<0>(const mpcd::detail::thermo_args_t& args, - const Index3D& ci, - const Index3D& inner_ci, - const uint3& offset, - const unsigned int n_dimensions, - const unsigned int block_size, - const unsigned int tpp) - { - } - -/*! - * \param args Common arguments for cell thermo compute - * \param ci Cell indexer - * \param inner_ci Cell indexer for the inner cells - * \param offset Offset of \a inner_ci from \a ci - * \param n_dimensions System dimensionality - * \param block_size Number of threads per block - * \param tpp Number of threads per cell - * - * \returns cudaSuccess on completion - * - * \sa mpcd::gpu::launch_inner_cell_thermo - * \sa mpcd::gpu::kernel::inner_cell_thermo - */ -cudaError_t inner_cell_thermo(const mpcd::detail::thermo_args_t& args, - const Index3D& ci, - const Index3D& inner_ci, - const uint3& offset, - const unsigned int n_dimensions, - const unsigned int block_size, - const unsigned int tpp) - { - if (inner_ci.getNumElements() == 0) - return cudaSuccess; - - launch_inner_cell_thermo<32>(args, ci, inner_ci, offset, n_dimensions, block_size, tpp); - - return cudaSuccess; - } - -/*! - * \param d_tmp_thermo Temporary cell packed thermo element - * \param d_cell_vel Cell velocity to reduce - * \param d_cell_energy Cell energy to reduce - * \param tmp_ci Temporary cell indexer for cells undergoing reduction - * \param ci Cell indexer Regular cell list indexer - * \param need_energy If true, compute the cell-level energy properties - * \param block_size Number of threads per block - * - * \returns cudaSuccess on completion - * - * \sa mpcd::gpu::kernel::stage_net_cell_thermo - */ -cudaError_t stage_net_cell_thermo(mpcd::detail::cell_thermo_element* d_tmp_thermo, - const double4* d_cell_vel, - const double3* d_cell_energy, - const Index3D& tmp_ci, - const Index3D& ci, - bool need_energy, - const unsigned int block_size) - { - if (need_energy) - { - unsigned int max_block_size_energy; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::stage_net_cell_thermo); - max_block_size_energy = attr.maxThreadsPerBlock; - - unsigned int run_block_size = min(block_size, max_block_size_energy); - dim3 grid(tmp_ci.getNumElements() / run_block_size + 1); - mpcd::gpu::kernel::stage_net_cell_thermo - <<>>(d_tmp_thermo, d_cell_vel, d_cell_energy, tmp_ci, ci); - } - else - { - unsigned int max_block_size_noenergy; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::stage_net_cell_thermo); - max_block_size_noenergy = attr.maxThreadsPerBlock; - - unsigned int run_block_size = min(block_size, max_block_size_noenergy); - dim3 grid(tmp_ci.getNumElements() / run_block_size + 1); - mpcd::gpu::kernel::stage_net_cell_thermo - <<>>(d_tmp_thermo, d_cell_vel, d_cell_energy, tmp_ci, ci); - } - return cudaSuccess; - } - -/*! - * \param d_reduced Cell thermo properties reduced across all cells (output on second call) - * \param d_tmp Temporary storage for reduction (output on first call) - * \param tmp_bytes Number of bytes allocated for temporary storage (output on first call) - * \param d_tmp_thermo Cell thermo properties to reduce - * \param Ncell The number of cells to reduce across - * - * \returns cudaSuccess on completion - * - * \b Implementation details: - * CUB DeviceReduce is used to perform the reduction. Hence, this function requires - * two calls to perform the reduction. The first call sizes the temporary storage, - * which is returned in \a d_tmp and \a tmp_bytes. The caller must then allocate - * the required bytes, and call the function a second time. This performs the - * reduction and returns the result in \a d_reduced. - */ -cudaError_t reduce_net_cell_thermo(mpcd::detail::cell_thermo_element* d_reduced, - void* d_tmp, - size_t& tmp_bytes, - const mpcd::detail::cell_thermo_element* d_tmp_thermo, - const size_t Ncell) - { - cub::DeviceReduce::Sum(d_tmp, tmp_bytes, d_tmp_thermo, d_reduced, (unsigned int)Ncell); - return cudaSuccess; - } - -//! Explicit template instantiation of pack for cell velocity -template cudaError_t __attribute__((visibility("default"))) -pack_cell_buffer(typename mpcd::detail::CellVelocityPackOp::element* d_send_buf, - const double4* d_props, - const unsigned int* d_send_idx, - const mpcd::detail::CellVelocityPackOp op, - const unsigned int num_send, - unsigned int block_size); - -//! Explicit template instantiation of pack for cell energy -template cudaError_t __attribute__((visibility("default"))) -pack_cell_buffer(typename mpcd::detail::CellEnergyPackOp::element* d_send_buf, - const double3* d_props, - const unsigned int* d_send_idx, - const mpcd::detail::CellEnergyPackOp op, - const unsigned int num_send, - unsigned int block_size); - -//! Explicit template instantiation of unpack for cell velocity -template cudaError_t __attribute__((visibility("default"))) -unpack_cell_buffer(double4* d_props, - const unsigned int* d_cells, - const unsigned int* d_recv, - const unsigned int* d_recv_begin, - const unsigned int* d_recv_end, - const typename mpcd::detail::CellVelocityPackOp::element* d_recv_buf, - const mpcd::detail::CellVelocityPackOp op, - const unsigned int num_cells, - const unsigned int block_size); - -//! Explicit template instantiation of unpack for cell energy -template cudaError_t __attribute__((visibility("default"))) -unpack_cell_buffer(double3* d_props, - const unsigned int* d_cells, - const unsigned int* d_recv, - const unsigned int* d_recv_begin, - const unsigned int* d_recv_end, - const typename mpcd::detail::CellEnergyPackOp::element* d_recv_buf, - const mpcd::detail::CellEnergyPackOp op, - const unsigned int num_cells, - const unsigned int block_size); - - } // end namespace gpu - } // end namespace mpcd - } // end namespace hoomd diff --git a/hoomd/mpcd/CellThermoComputeGPU.cuh b/hoomd/mpcd/CellThermoComputeGPU.cuh deleted file mode 100644 index b040e8ebbb..0000000000 --- a/hoomd/mpcd/CellThermoComputeGPU.cuh +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellThermoComputeGPU.cuh - * \brief Declaration of CUDA kernels for mpcd::CellThermoComputeGPU - */ - -#ifndef MPCD_CELL_THERMO_COMPUTE_GPU_CUH_ -#define MPCD_CELL_THERMO_COMPUTE_GPU_CUH_ - -#include "hoomd/BoxDim.h" -#include "hoomd/HOOMDMath.h" -#include "hoomd/Index1D.h" - -#include - -namespace hoomd - { -namespace mpcd - { -namespace detail - { -#ifdef __HIPCC__ -#define HOSTDEVICE __host__ __device__ -#else -#define HOSTDEVICE -#endif - -//! Custom cell thermo element for reductions on the gpu -struct cell_thermo_element - { - double3 momentum; //!< Momentum of the cell - double energy; //!< Energy of the cell - double temperature; //!< Temperature of the cell (0 if < 2 particles) - unsigned int flag; //!< Flag to be used to compute filled cells - - //! Addition operator for summed reduction - HOSTDEVICE cell_thermo_element operator+(const cell_thermo_element& other) const - { - cell_thermo_element sum; - sum.momentum.x = momentum.x + other.momentum.x; - sum.momentum.y = momentum.y + other.momentum.y; - sum.momentum.z = momentum.z + other.momentum.z; - sum.energy = energy + other.energy; - sum.temperature = temperature + other.temperature; - sum.flag = flag + other.flag; - - return sum; - } - }; - -//! Convenience struct for common parameters passed to GPU kernels -struct thermo_args_t - { - thermo_args_t(double4* cell_vel_, - double3* cell_energy_, - const unsigned int* cell_np_, - const unsigned int* cell_list_, - const Index2D& cli_, - const Scalar4* vel_, - const unsigned int N_mpcd_, - const Scalar mass_, - const Scalar4* embed_vel_, - const unsigned int* embed_idx_, - bool need_energy_) - : cell_vel(cell_vel_), cell_energy(cell_energy_), cell_np(cell_np_), cell_list(cell_list_), - cli(cli_), vel(vel_), N_mpcd(N_mpcd_), mass(mass_), embed_vel(embed_vel_), - embed_idx(embed_idx_), need_energy(need_energy_) - { - } - - double4* cell_vel; //!< Cell velocities (output) - double3* cell_energy; //!< Cell energies (output) - - const unsigned int* cell_np; //!< Number of particles per cell - const unsigned int* cell_list; //!< MPCD cell list - const Index2D cli; //!< MPCD cell list indexer - const Scalar4* vel; //!< MPCD particle velocities - const unsigned int N_mpcd; //!< Number of MPCD particles - const Scalar mass; //!< MPCD particle mass - const Scalar4* embed_vel; //!< Embedded particle velocities - const unsigned int* embed_idx; //!< Embedded particle indexes - const bool need_energy; //!< Flag if energy calculations are required - }; -#undef HOSTDEVICE - } // namespace detail - -namespace gpu - { -//! Kernel driver to begin cell thermo compute of outer cells -cudaError_t begin_cell_thermo(const mpcd::detail::thermo_args_t& args, - const unsigned int* d_cells, - const unsigned int num_cells, - const unsigned int block_size, - const unsigned int tpp); - -//! Kernel driver to finalize cell thermo compute of outer cells -cudaError_t end_cell_thermo(double4* d_cell_vel, - double3* d_cell_energy, - const unsigned int* d_cells, - const unsigned int Ncell, - const unsigned int n_dimensions, - const bool need_energy, - const unsigned int block_size); - -//! Kernel driver to perform cell thermo compute for inner cells -cudaError_t inner_cell_thermo(const mpcd::detail::thermo_args_t& args, - const Index3D& ci, - const Index3D& inner_ci, - const uint3& offset, - const unsigned int n_dimensions, - const unsigned int block_size, - const unsigned int tpp); - -//! Kernel driver to stage cell properties for net thermo reduction -cudaError_t stage_net_cell_thermo(mpcd::detail::cell_thermo_element* d_tmp_thermo, - const double4* d_cell_vel, - const double3* d_cell_energy, - const Index3D& tmp_ci, - const Index3D& ci, - const bool need_energy, - const unsigned int block_size); - -//! Wrapper to cub device reduce for cell thermo properties -cudaError_t reduce_net_cell_thermo(mpcd::detail::cell_thermo_element* d_reduced, - void* d_tmp, - size_t& tmp_bytes, - const mpcd::detail::cell_thermo_element* d_tmp_thermo, - const size_t Ncell); - - } // end namespace gpu - } // end namespace mpcd - } // end namespace hoomd -#endif // MPCD_CELL_THERMO_COMPUTE_GPU_CUH_ diff --git a/hoomd/mpcd/CellThermoComputeGPU.h b/hoomd/mpcd/CellThermoComputeGPU.h deleted file mode 100644 index f5c0d652c1..0000000000 --- a/hoomd/mpcd/CellThermoComputeGPU.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2009-2025 The Regents of the University of Michigan. -// Part of HOOMD-blue, released under the BSD 3-Clause License. - -/*! - * \file mpcd/CellThermoComputeGPU.h - * \brief Declaration of mpcd::CellThermoComputeGPU - */ - -#ifndef MPCD_CELL_THERMO_COMPUTE_GPU_H_ -#define MPCD_CELL_THERMO_COMPUTE_GPU_H_ - -#ifdef __HIPCC__ -#error This header cannot be compiled by nvcc -#endif - -#include "CellThermoCompute.h" -#include "CellThermoComputeGPU.cuh" - -#include "hoomd/Autotuner.h" - -// pybind11 -#include - -namespace hoomd - { -namespace mpcd - { -//! Computes the cell (thermodynamic) properties on the GPU -class PYBIND11_EXPORT CellThermoComputeGPU : public mpcd::CellThermoCompute - { - public: - //! Constructor - CellThermoComputeGPU(std::shared_ptr sysdef, - std::shared_ptr cl); - - //! Destructor - virtual ~CellThermoComputeGPU(); - - protected: -#ifdef ENABLE_MPI - //! Begin the calculation of outer cell properties on the GPU - void beginOuterCellProperties() override; - - //! Finish the calculation of outer cell properties on the GPU - void finishOuterCellProperties() override; -#endif // ENABLE_MPI - - //! Calculate the inner cell properties on the GPU - void calcInnerCellProperties() override; - - //! Compute the net properties from the cell properties - void computeNetProperties() override; - - private: - std::shared_ptr> m_begin_tuner; //!< Tuner for cell begin kernel - std::shared_ptr> m_end_tuner; //!< Tuner for cell end kernel - std::shared_ptr> m_inner_tuner; //!< Tuner for inner cell compute kernel - std::shared_ptr> m_stage_tuner; //!< Tuner for staging net property compute - - GPUVector - m_tmp_thermo; //!< Temporary array for holding cell data - GPUFlags m_reduced; //!< Flags to hold reduced sum - }; - } // end namespace mpcd - } // end namespace hoomd - -#endif // MPCD_CELL_THERMO_COMPUTE_GPU_H_ diff --git a/hoomd/mpcd/CellThermoTypes.h b/hoomd/mpcd/CellThermoTypes.h index 7bc05e590d..a900b0c57d 100644 --- a/hoomd/mpcd/CellThermoTypes.h +++ b/hoomd/mpcd/CellThermoTypes.h @@ -3,7 +3,7 @@ /*! * \file mpcd/CellThermoTypes.h - * \brief Defines enums for mpcd::CellThermoCompute + * \brief Defines enums for mpcd::CellList */ #ifndef MPCD_CELL_THERMO_TYPES_H_ diff --git a/hoomd/mpcd/CollisionMethod.cc b/hoomd/mpcd/CollisionMethod.cc index b7d59b66f7..503ca9bfb5 100644 --- a/hoomd/mpcd/CollisionMethod.cc +++ b/hoomd/mpcd/CollisionMethod.cc @@ -99,12 +99,6 @@ void mpcd::CollisionMethod::collide(uint64_t timestep) } #endif // ENABLE_HIP - // set random grid shift - m_cl->drawGridShift(timestep); - - // update cell list - m_cl->compute(timestep); - // setup rigid bodies before collision happens const bool rigid_body_collision = m_embed_group && m_rigid_bodies; if (rigid_body_collision) @@ -143,6 +137,8 @@ void mpcd::CollisionMethod::collide(uint64_t timestep) } } + // update cell list + m_cl->compute(timestep); // apply collisions rule(timestep); diff --git a/hoomd/mpcd/Integrator.cc b/hoomd/mpcd/Integrator.cc index 7ed9e842f8..a463e67ec4 100644 --- a/hoomd/mpcd/Integrator.cc +++ b/hoomd/mpcd/Integrator.cc @@ -87,13 +87,14 @@ void mpcd::Integrator::update(uint64_t timestep) } } + // perform the core MPCD steps of collision and streaming + if (m_collide) + m_collide->collide(timestep); + // optionally sort for performance if (m_sorter && (*m_sorter->getTrigger())(timestep)) m_sorter->update(timestep); - // perform the core MPCD steps of collision and streaming - if (m_collide) - m_collide->collide(timestep); if (m_stream) m_stream->stream(timestep); diff --git a/hoomd/mpcd/ParticleData.h b/hoomd/mpcd/ParticleData.h index f1a0c3298b..6453098dfa 100644 --- a/hoomd/mpcd/ParticleData.h +++ b/hoomd/mpcd/ParticleData.h @@ -57,15 +57,6 @@ namespace mpcd * are based on around the velocity and cell. For details of what the cell means, * refer to the mpcd::CellList. * - * \todo Because the local cell index changes with position, a signal will be put - * in place to indicate when the cached cell index is still valid. - * - * \todo Likewise, MPCD benefits from sorting data into cell order, so a signal - * needs to be put in place when the ordering changes. - * - * \todo Likewise, a signal should be incorporated to indicate when particles are - * added or removed locally, as is the case during particle migration. - * * \ingroup data_structs */ class PYBIND11_EXPORT ParticleData : public Autotuned @@ -272,9 +263,7 @@ class PYBIND11_EXPORT ParticleData : public Autotuned } //! Signature for particle sort signal - typedef Nano::Signal< - void(uint64_t timestep, const GPUArray&, const GPUArray&)> - SortSignal; + typedef Nano::Signal SortSignal; //! Get the sort signal /*! @@ -286,23 +275,6 @@ class PYBIND11_EXPORT ParticleData : public Autotuned return m_sort_signal; } - //! Notify subscribers of a particle sort - /*! - * \param timestep Timestep that the sorting occurred - * \param order Mapping of sorted particle indexes onto old particle indexes - * \param rorder Mapping of old particle indexes onto sorted particle indexes - * - * This method notifies the subscribers of the sort occurring at \a timestep. - * Subscribers may choose to use \a order and \a rorder to reorder their - * per-particle data immediately, or delay the sort until their next call. - */ - void notifySort(uint64_t timestep, - const GPUArray& order, - const GPUArray& rorder) - { - m_sort_signal.emit(timestep, order, rorder); - } - //! Notify subscribers of a particle sort /*! * \param timestep Timestep that the sorting occurred @@ -312,8 +284,7 @@ class PYBIND11_EXPORT ParticleData : public Autotuned */ void notifySort(uint64_t timestep) { - GPUArray order, rorder; - m_sort_signal.emit(timestep, order, rorder); + m_sort_signal.emit(timestep); } //@} diff --git a/hoomd/mpcd/SRDCollisionMethod.cc b/hoomd/mpcd/SRDCollisionMethod.cc index c69b8ed16e..f9c92eec43 100644 --- a/hoomd/mpcd/SRDCollisionMethod.cc +++ b/hoomd/mpcd/SRDCollisionMethod.cc @@ -32,22 +32,20 @@ mpcd::SRDCollisionMethod::~SRDCollisionMethod() void mpcd::SRDCollisionMethod::startAutotuning() { mpcd::CollisionMethod::startAutotuning(); - if (m_thermo) - m_thermo->startAutotuning(); + if (m_cl) + m_cl->startAutotuning(); } bool mpcd::SRDCollisionMethod::isAutotuningComplete() { bool result = mpcd::CollisionMethod::isAutotuningComplete(); - if (m_thermo) - result = result && m_thermo->isAutotuningComplete(); + if (m_cl) + result = result && m_cl->isAutotuningComplete(); return result; } void mpcd::SRDCollisionMethod::rule(uint64_t timestep) { - m_thermo->compute(timestep); - // resize the rotation vectors and rescale factors m_rotvec.resize(m_cl->getNCells()); if (m_T) @@ -71,16 +69,20 @@ void mpcd::SRDCollisionMethod::drawRotationVectors(uint64_t timestep) // optional scale factors std::unique_ptr> h_factors; - std::unique_ptr> h_cell_energy; + std::unique_ptr> h_cell_np; + std::unique_ptr> h_cell_temp; Scalar T_set(1.0); const bool use_thermostat = (m_T) ? true : false; if (use_thermostat) { h_factors.reset( new ArrayHandle(m_factors, access_location::host, access_mode::overwrite)); - h_cell_energy.reset(new ArrayHandle(m_thermo->getCellEnergies(), - access_location::host, - access_mode::read)); + h_cell_np.reset(new ArrayHandle(m_cl->getCellSizeArray(), + access_location::host, + access_mode::read)); + h_cell_temp.reset(new ArrayHandle(m_cl->getCellTemperature(), + access_location::host, + access_mode::read)); T_set = (*m_T)(timestep); } @@ -110,8 +112,8 @@ void mpcd::SRDCollisionMethod::drawRotationVectors(uint64_t timestep) if (use_thermostat) { - const double3 cell_energy = h_cell_energy->data[idx]; - const unsigned int np = __double_as_int(cell_energy.z); + const double cell_temp = h_cell_temp->data[idx]; + const unsigned int np = h_cell_np->data[idx]; double factor = 1.0; if (np > 1) { @@ -125,7 +127,7 @@ void mpcd::SRDCollisionMethod::drawRotationVectors(uint64_t timestep) // generate the scale factor from the current temperature // (don't use the kinetic energy of this cell, since this // is total not relative to COM) - const double cur_ke = alpha * cell_energy.y; + const double cur_ke = alpha * cell_temp; factor = (cur_ke > 0.) ? fast::sqrt(rand_ke / cur_ke) : 1.; } h_factors->data[idx] = factor; @@ -162,7 +164,7 @@ void mpcd::SRDCollisionMethod::rotate(uint64_t timestep) } // acquire cell velocities - ArrayHandle h_cell_vel(m_thermo->getCellVelocities(), + ArrayHandle h_cell_vel(m_cl->getCellVelocities(), access_location::host, access_mode::read); @@ -264,29 +266,24 @@ void mpcd::SRDCollisionMethod::setCellList(std::shared_ptr cl) detachCallbacks(); if (m_cl) { - m_thermo = std::make_shared(m_sysdef, m_cl); attachCallbacks(); } - else - { - m_thermo = std::shared_ptr(); - } } } void mpcd::SRDCollisionMethod::attachCallbacks() { - assert(m_thermo); - m_thermo->getFlagsSignal() + assert(m_cl); + m_cl->getFlagsSignal() .connect( this); } void mpcd::SRDCollisionMethod::detachCallbacks() { - if (m_thermo) + if (m_cl) { - m_thermo->getFlagsSignal() + m_cl->getFlagsSignal() .disconnect(this); } diff --git a/hoomd/mpcd/SRDCollisionMethod.h b/hoomd/mpcd/SRDCollisionMethod.h index d4732269dd..2d7c09b84e 100644 --- a/hoomd/mpcd/SRDCollisionMethod.h +++ b/hoomd/mpcd/SRDCollisionMethod.h @@ -13,7 +13,6 @@ #error This header cannot be compiled by nvcc #endif -#include "CellThermoCompute.h" #include "CollisionMethod.h" #include "hoomd/Variant.h" @@ -81,9 +80,8 @@ class PYBIND11_EXPORT SRDCollisionMethod : public mpcd::CollisionMethod } protected: - std::shared_ptr m_thermo; //!< Cell thermo - GPUVector m_rotvec; //!< MPCD rotation vectors - Scalar m_angle; //!< MPCD rotation angle (degrees) + GPUVector m_rotvec; //!< MPCD rotation vectors + Scalar m_angle; //!< MPCD rotation angle (degrees) GPUVector m_factors; //!< Cell-level rescale factors diff --git a/hoomd/mpcd/SRDCollisionMethodGPU.cc b/hoomd/mpcd/SRDCollisionMethodGPU.cc index bb13dd00bb..b124879487 100644 --- a/hoomd/mpcd/SRDCollisionMethodGPU.cc +++ b/hoomd/mpcd/SRDCollisionMethodGPU.cc @@ -7,7 +7,6 @@ */ #include "SRDCollisionMethodGPU.h" -#include "CellThermoComputeGPU.h" #include "SRDCollisionMethodGPU.cuh" namespace hoomd @@ -37,14 +36,18 @@ void mpcd::SRDCollisionMethodGPU::drawRotationVectors(uint64_t timestep) if (m_T) { ArrayHandle d_factors(m_factors, access_location::device, access_mode::overwrite); - ArrayHandle d_cell_energy(m_thermo->getCellEnergies(), - access_location::device, - access_mode::read); + ArrayHandle d_cell_np(m_cl->getCellSizeArray(), + access_location::device, + access_mode::read); + ArrayHandle d_cell_temp(m_cl->getCellTemperature(), + access_location::device, + access_mode::read); m_tuner_rotvec->begin(); mpcd::gpu::srd_draw_vectors(d_rotvec.data, d_factors.data, - d_cell_energy.data, + d_cell_np.data, + d_cell_temp.data, m_cl->getCellIndexer(), m_cl->getOriginIndex(), m_cl->getGlobalDim(), @@ -62,6 +65,7 @@ void mpcd::SRDCollisionMethodGPU::drawRotationVectors(uint64_t timestep) { m_tuner_rotvec->begin(); mpcd::gpu::srd_draw_vectors(d_rotvec.data, + NULL, NULL, NULL, m_cl->getCellIndexer(), @@ -89,7 +93,7 @@ void mpcd::SRDCollisionMethodGPU::rotate(uint64_t timestep) unsigned int N_tot = N_mpcd; // acquire cell velocities and rotation vectors - ArrayHandle d_cell_vel(m_thermo->getCellVelocities(), + ArrayHandle d_cell_vel(m_cl->getCellVelocities(), access_location::device, access_mode::read); ArrayHandle d_rotvec(m_rotvec, access_location::device, access_mode::read); @@ -162,13 +166,8 @@ void mpcd::SRDCollisionMethodGPU::setCellList(std::shared_ptr cl detachCallbacks(); if (m_cl) { - m_thermo = std::make_shared(m_sysdef, m_cl); attachCallbacks(); } - else - { - m_thermo = std::shared_ptr(); - } } } diff --git a/hoomd/mpcd/SRDCollisionMethodGPU.cu b/hoomd/mpcd/SRDCollisionMethodGPU.cu index d33a7a56c5..c8185114f4 100644 --- a/hoomd/mpcd/SRDCollisionMethodGPU.cu +++ b/hoomd/mpcd/SRDCollisionMethodGPU.cu @@ -21,7 +21,8 @@ namespace kernel template __global__ void srd_draw_vectors(double3* d_rotvec, double* d_factors, - const double3* d_cell_energy, + const unsigned int* d_cell_np, + const double* d_cell_temp, const Index3D ci, const int3 origin, const uint3 global_dim, @@ -73,8 +74,8 @@ __global__ void srd_draw_vectors(double3* d_rotvec, if (use_thermostat) { - const double3 cell_energy = d_cell_energy[idx]; - const unsigned int np = __double_as_int(cell_energy.z); + const double cell_temp = d_cell_temp[idx]; + const unsigned int np = d_cell_np[idx]; double factor = 1.0; if (np > 1) { @@ -88,7 +89,7 @@ __global__ void srd_draw_vectors(double3* d_rotvec, // generate the scale factor from the current temperature // (don't use the kinetic energy of this cell, since this // is total not relative to COM) - const double cur_ke = alpha * cell_energy.y; + const double cur_ke = alpha * cell_temp; factor = (cur_ke > 0.) ? fast::sqrt(rand_ke / cur_ke) : 1.; } d_factors[idx] = factor; @@ -184,7 +185,8 @@ __global__ void srd_rotate(Scalar4* d_vel, cudaError_t srd_draw_vectors(double3* d_rotvec, double* d_factors, - const double3* d_cell_energy, + const unsigned int* d_cell_np, + const double* d_cell_temp, const Index3D& ci, const int3 origin, const uint3 global_dim, @@ -208,7 +210,8 @@ cudaError_t srd_draw_vectors(double3* d_rotvec, dim3 grid(Ncell / run_block_size + 1); mpcd::gpu::kernel::srd_draw_vectors<<>>(d_rotvec, d_factors, - d_cell_energy, + d_cell_np, + d_cell_temp, ci, origin, global_dim, @@ -232,7 +235,8 @@ cudaError_t srd_draw_vectors(double3* d_rotvec, dim3 grid(Ncell / run_block_size + 1); mpcd::gpu::kernel::srd_draw_vectors<<>>(d_rotvec, d_factors, - d_cell_energy, + d_cell_np, + d_cell_temp, ci, origin, global_dim, diff --git a/hoomd/mpcd/SRDCollisionMethodGPU.cuh b/hoomd/mpcd/SRDCollisionMethodGPU.cuh index 41d451ebca..eb22607340 100644 --- a/hoomd/mpcd/SRDCollisionMethodGPU.cuh +++ b/hoomd/mpcd/SRDCollisionMethodGPU.cuh @@ -22,7 +22,8 @@ namespace gpu { cudaError_t srd_draw_vectors(double3* d_rotvec, double* d_factors, - const double3* d_cell_energy, + const unsigned int* d_cell_np, + const double* d_cell_temp, const Index3D& ci, const int3 origin, const uint3 global_dim, diff --git a/hoomd/mpcd/Sorter.cc b/hoomd/mpcd/Sorter.cc index cc178c372a..3c26cf5a04 100644 --- a/hoomd/mpcd/Sorter.cc +++ b/hoomd/mpcd/Sorter.cc @@ -14,8 +14,7 @@ namespace hoomd * \param sysdef System definition */ mpcd::Sorter::Sorter(std::shared_ptr sysdef, std::shared_ptr trigger) - : Tuner(sysdef, trigger), m_mpcd_pdata(m_sysdef->getMPCDParticleData()), m_order(m_exec_conf), - m_rorder(m_exec_conf) + : Tuner(sysdef, trigger), m_mpcd_pdata(m_sysdef->getMPCDParticleData()), m_order(m_exec_conf) { m_exec_conf->msg->notice(5) << "Constructing MPCD Sorter" << std::endl; } @@ -36,17 +35,18 @@ void mpcd::Sorter::update(uint64_t timestep) { throw std::runtime_error("Cell list has not been set"); } + // ensure that the cell list is built + m_cl->compute(timestep); // resize the sorted order vector to the current number of particles m_order.resize(m_mpcd_pdata->getN()); - m_rorder.resize(m_mpcd_pdata->getN()); // generate and apply the sorted order computeOrder(timestep); applyOrder(); // trigger the sort signal for ParticleData callbacks using the current sortings - m_mpcd_pdata->notifySort(timestep, m_order, m_rorder); + m_mpcd_pdata->notifySort(timestep); } /*! @@ -58,37 +58,18 @@ void mpcd::Sorter::update(uint64_t timestep) */ void mpcd::Sorter::computeOrder(uint64_t timestep) { - // compute the cell list at current timestep, guarantees owned particles are on rank - m_cl->compute(timestep); - - ArrayHandle h_cell_list(m_cl->getCellList(), - access_location::host, - access_mode::read); - ArrayHandle h_cell_np(m_cl->getCellSizeArray(), - access_location::host, - access_mode::read); - const Index2D& cli = m_cl->getCellListIndexer(); - - // loop through the cell list to generate the sorting order for MPCD particles ArrayHandle h_order(m_order, access_location::host, access_mode::overwrite); - ArrayHandle h_rorder(m_rorder, access_location::host, access_mode::overwrite); - const unsigned int N_mpcd = m_mpcd_pdata->getN(); - unsigned int cur_p = 0; - for (unsigned int idx = 0; idx < m_cl->getNCells(); ++idx) - { - const unsigned int np = h_cell_np.data[idx]; - for (unsigned int offset = 0; offset < np; ++offset) - { - const unsigned int pid = h_cell_list.data[cli(offset, idx)]; - // only count MPCD particles, and skip embedded particles - if (pid < N_mpcd) - { - h_order.data[cur_p] = pid; - h_rorder.data[pid] = cur_p; - ++cur_p; - } - } - } + ArrayHandle h_vel(m_mpcd_pdata->getVelocities(), + access_location::host, + access_mode::read); + unsigned int mpcd_N = m_mpcd_pdata->getN(); + // sort indices + std::iota(h_order.data, h_order.data + mpcd_N, 0); + + std::sort(h_order.data, + h_order.data + mpcd_N, + [&h_vel](unsigned int i, unsigned int j) + { return __scalar_as_int(h_vel.data[i].w) < __scalar_as_int(h_vel.data[j].w); }); } /*! diff --git a/hoomd/mpcd/Sorter.h b/hoomd/mpcd/Sorter.h index 3a308cca0f..427f5c61c6 100644 --- a/hoomd/mpcd/Sorter.h +++ b/hoomd/mpcd/Sorter.h @@ -64,8 +64,7 @@ class PYBIND11_EXPORT Sorter : public Tuner std::shared_ptr m_mpcd_pdata; //!< MPCD particle data std::shared_ptr m_cl; //!< MPCD cell list - GPUVector m_order; //!< Maps new sorted index onto old particle indexes - GPUVector m_rorder; //!< Maps old particle indexes onto new sorted indexes + GPUVector m_order; //!< Maps new sorted index onto old particle indexes //! Compute the sorting order at the current timestep virtual void computeOrder(uint64_t timestep); diff --git a/hoomd/mpcd/SorterGPU.cc b/hoomd/mpcd/SorterGPU.cc index 897b81919b..cc88796b32 100644 --- a/hoomd/mpcd/SorterGPU.cc +++ b/hoomd/mpcd/SorterGPU.cc @@ -16,18 +16,15 @@ namespace hoomd */ mpcd::SorterGPU::SorterGPU(std::shared_ptr sysdef, std::shared_ptr trigger) - : mpcd::Sorter(sysdef, trigger) + : mpcd::Sorter(sysdef, trigger), m_cell_id(m_exec_conf) { - m_sentinel_tuner.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_sort_sentinel")); - m_reverse_tuner.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, - m_exec_conf, - "mpcd_sort_reverse")); + m_compute_order_tuner.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, + m_exec_conf, + "mpcd_sort_sentinel")); m_apply_tuner.reset(new Autotuner<1>({AutotunerBase::makeBlockSizeRange(m_exec_conf)}, m_exec_conf, "mpcd_sort_apply")); - m_autotuners.insert(m_autotuners.end(), {m_sentinel_tuner, m_reverse_tuner, m_apply_tuner}); + m_autotuners.insert(m_autotuners.end(), {m_compute_order_tuner, m_apply_tuner}); } /*! @@ -39,64 +36,38 @@ mpcd::SorterGPU::SorterGPU(std::shared_ptr sysdef, */ void mpcd::SorterGPU::computeOrder(uint64_t timestep) { - // compute the cell list at current timestep, guarantees owned particles are on rank - m_cl->compute(timestep); - - // fill the empty cell list entries with a sentinel larger than number of MPCD particles + // ensure auxiliary array is correct size + const unsigned int mpcd_N = m_mpcd_pdata->getN(); + if (mpcd_N > m_cell_id.getNumElements()) { - ArrayHandle d_cell_list(m_cl->getCellList(), - access_location::device, - access_mode::readwrite); - ArrayHandle d_cell_np(m_cl->getCellSizeArray(), - access_location::device, - access_mode::read); - - m_sentinel_tuner->begin(); - mpcd::gpu::sort_set_sentinel(d_cell_list.data, - d_cell_np.data, - m_cl->getCellListIndexer(), - 0xffffffff, - m_sentinel_tuner->getParam()[0]); - if (m_exec_conf->isCUDAErrorCheckingEnabled()) - CHECK_CUDA_ERROR(); - m_sentinel_tuner->end(); + GPUArray cell_id(mpcd_N, m_exec_conf); + m_cell_id.swap(cell_id); } - // use thrust to select out the indexes of MPCD particles + // compute the cell list at current timestep, guarantees owned particles are on rank + // create an order to sort the particle indices by their cell { - ArrayHandle d_cell_list(m_cl->getCellList(), - access_location::device, - access_mode::read); ArrayHandle d_order(m_order, access_location::device, access_mode::overwrite); - const unsigned int num_select - = mpcd::gpu::sort_cell_compact(d_order.data, - d_cell_list.data, - m_cl->getCellListIndexer().getNumElements(), - m_mpcd_pdata->getN()); + ArrayHandle d_cell_id(m_cell_id, + access_location::device, + access_mode::readwrite); + ArrayHandle d_vel(m_mpcd_pdata->getVelocities(), + access_location::device, + access_mode::read); + m_compute_order_tuner->begin(); + mpcd::gpu::set_order(d_order.data, + d_cell_id.data, + d_vel.data, + mpcd_N, + m_compute_order_tuner->getParam()[0]); if (m_exec_conf->isCUDAErrorCheckingEnabled()) CHECK_CUDA_ERROR(); - if (num_select != m_mpcd_pdata->getN()) - { - m_exec_conf->msg->error() - << "Error compacting cell list for sorting, lost particles." << std::endl; - } - } - - // fill out the reverse ordering map - { - ArrayHandle d_order(m_order, access_location::device, access_mode::read); - ArrayHandle d_rorder(m_rorder, - access_location::device, - access_mode::overwrite); + m_compute_order_tuner->end(); - m_reverse_tuner->begin(); - mpcd::gpu::sort_gen_reverse(d_rorder.data, - d_order.data, - m_mpcd_pdata->getN(), - m_reverse_tuner->getParam()[0]); + // perform sorting + mpcd::gpu::compute_order(d_order.data, d_cell_id.data, mpcd_N); if (m_exec_conf->isCUDAErrorCheckingEnabled()) CHECK_CUDA_ERROR(); - m_reverse_tuner->end(); } } diff --git a/hoomd/mpcd/SorterGPU.cu b/hoomd/mpcd/SorterGPU.cu index 2367372fd4..5212fc7296 100644 --- a/hoomd/mpcd/SorterGPU.cu +++ b/hoomd/mpcd/SorterGPU.cu @@ -11,7 +11,9 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #include +#include #include +#include #pragma GCC diagnostic pop namespace hoomd @@ -59,65 +61,27 @@ __global__ void sort_apply(Scalar4* d_pos_alt, //! Kernel to set the empty-cell-entry sentinel /*! - * \param d_cell_list Cell list to fill in - * \param d_cell_np Number of particles per cell - * \param cli Two-dimensional cell-list indexer - * \param sentinel Value to fill into empty cell-list entries - * \param N_cli Number of total entries (filled and empty) in the cell list - * - * \b Implementation - * Using one thread per cell-list entry, the 1D kernel index is mapped onto - * the 2D entry in the cell list. If the current entry is not filled, the entry - * is filled with the value of \a sentinel. Typically, \a sentinel should be - * larger than the number of MPCD particles in the system. (A good value would be - * 0xffffffff). The cell list can subsequently be compacted by mpcd::gpu::sort_cell_compact. - */ -__global__ void sort_set_sentinel(unsigned int* d_cell_list, - const unsigned int* d_cell_np, - const Index2D cli, - const unsigned int sentinel, - const unsigned int N_cli) - { - // one thread per cell-list entry - const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= N_cli) - return; - - // convert the entry 1D index into a 2D index - const unsigned int cell = idx / cli.getW(); - const unsigned int offset = idx - (cell * cli.getW()); - - // if the offset lies outside the number of particles in the cell, fill it with sentinel - const unsigned int np = d_cell_np[cell]; - if (offset >= np) - d_cell_list[idx] = sentinel; - } - -//! Kernel to generate the reverse particle mapping -/*! - * \param d_rorder Map of old particle indexes onto new particle indexes (output) - * \param d_order Map of new particle indexes onto old particle indexes + * \param d_order Particle order to fill with indices + * \param d_cell_id auxiliary array to sort cell id of each mpcd particles + * \param d_vel velocities and cell ids of mpcd particles * \param N Number of particles * * \b Implementation - * Using one thread per particle, the map generated by mpcd::gpu::sort_cell_compact - * is read and reversed so that new particle indexes can be looked up from old - * particle indexes. + * Fills order with indices 0 to N and the auxiliary array d_cell_id with + * the cell ids of the mpcd particles in preparation to sort the values + * in d_order by the cell ids. */ __global__ void -sort_gen_reverse(unsigned int* d_rorder, const unsigned int* d_order, const unsigned int N) +set_order(unsigned int* d_order, unsigned int* d_cell_id, const Scalar4* d_vel, unsigned int N) { // one thread per particle const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= N) return; - // inverse map the ordering - // d_order maps the new index (idx) onto the old index (pid), so we need to flip this around - const unsigned int pid = d_order[idx]; - d_rorder[pid] = idx; + d_order[idx] = idx; + d_cell_id[idx] = __scalar_as_int(d_vel[idx].w); } - } // end namespace kernel /*! @@ -168,123 +132,45 @@ cudaError_t sort_apply(Scalar4* d_pos_alt, } /*! - * \param d_cell_list Cell list to fill in - * \param d_cell_np Number of particles per cell - * \param cli Two-dimensional cell-list indexer - * \param sentinel Value to fill into empty cell-list entries - * \param N_cli Number of total entries (filled and empty) in the cell list + * \param d_order Compacted MPCD particle indexes in cell-list order (output) + * \param d_cell_id auxiliary array to sort cell id of each mpcd particles + * \param d_vel Cell indexes of the MPCD particles + * \param N Number of particles * \param block_size Number of threads per block - * + * \returns cudaSuccess on completion * - * \sa mpcd::gpu::kernel::sort_set_sentinel + * \sa mpcd::gpu::kernel::set_order */ -cudaError_t sort_set_sentinel(unsigned int* d_cell_list, - const unsigned int* d_cell_np, - const Index2D& cli, - const unsigned int sentinel, - const unsigned int block_size) +cudaError_t set_order(unsigned int* d_order, + unsigned int* d_cell_id, + const Scalar4* d_vel, + const unsigned int N, + const unsigned int block_size) { unsigned int max_block_size; cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::sort_set_sentinel); + cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::set_order); max_block_size = attr.maxThreadsPerBlock; - const unsigned int N_cli = cli.getNumElements(); - unsigned int run_block_size = min(block_size, max_block_size); - dim3 grid(N_cli / run_block_size + 1); - mpcd::gpu::kernel::sort_set_sentinel<<>>(d_cell_list, - d_cell_np, - cli, - sentinel, - N_cli); - + dim3 grid(N / run_block_size + 1); + mpcd::gpu::kernel::set_order<<>>(d_order, d_cell_id, d_vel, N); return cudaSuccess; } -//! Less-than comparison functor for cell-list compaction -struct LessThan - { - //! Constructor - /*! - * \param compare_ Value to compare less than - */ - __host__ __device__ LessThan(unsigned int compare_) : compare(compare_) { } - - //! Less than comparison functor - /*! - * \param val - * \returns True if \a val is less than \a compare - */ - __host__ __device__ bool operator()(const unsigned int& val) const - { - return (val < compare); - } - - unsigned int compare; //!< Value to compare less-than to - }; - /*! * \param d_order Compacted MPCD particle indexes in cell-list order (output) - * \param d_cell_list Cell list array to compact - * \param num_items Number of items in the cell list - * \param N_mpcd Number of MPCD particles - * - * \returns Number of items selected (should be equal to N_mpcd). - * - * \b Implementation - * thrust::copy_if is used to efficiently compact the MPCD cell list to - * a stream containing only MPCD particle indexes. First, a sentinel is set to - * fill in empty entries (mpcd::gpu::sort_set_sentinel). Then, the LessThan functor - * is used to compare all entries in the cell list, and return only those with - * indexes less than \a N_mpcd. - * - * \b Note - * A previous version of this function used CUB to perform the select, but this seemed to - * fail when HOOMD bumped to CUB 1.8.0. Segfaults were encountered when executing DeviceSelect::If. - * I decided to replace it with thrust since this is not so performance critical, and I was unable - * to find the proper root cause of it. - */ -unsigned int sort_cell_compact(unsigned int* d_order, - const unsigned int* d_cell_list, - const unsigned int num_items, - const unsigned int N_mpcd) - { - unsigned int* last = thrust::copy_if(thrust::device, - d_cell_list, - d_cell_list + num_items, - d_order, - LessThan(N_mpcd)); - return (unsigned int)(last - d_order); - } - -/*! - * \param d_rorder Map of old particle indexes onto new particle indexes (output) - * \param d_order Map of new particle indexes onto old particle indexes + * \param d_cell_id auxiliary array to sort cell id of each mpcd particles * \param N Number of particles - * + * \returns cudaSuccess on completion - * - * \sa mpcd::gpu::kernel::sort_gen_reverse */ -cudaError_t sort_gen_reverse(unsigned int* d_rorder, - const unsigned int* d_order, - const unsigned int N, - const unsigned int block_size) +cudaError_t compute_order(unsigned int* d_order, unsigned int* d_cell_id, const unsigned int N) { - if (N == 0) - return cudaSuccess; - - unsigned int max_block_size; - cudaFuncAttributes attr; - cudaFuncGetAttributes(&attr, (const void*)mpcd::gpu::kernel::sort_gen_reverse); - max_block_size = attr.maxThreadsPerBlock; - - unsigned int run_block_size = min(block_size, max_block_size); - dim3 grid(N / run_block_size + 1); - mpcd::gpu::kernel::sort_gen_reverse<<>>(d_rorder, d_order, N); - + thrust::device_ptr order(d_order); + thrust::device_ptr cell_id(d_cell_id); + thrust::sort_by_key(thrust::device, cell_id, cell_id + N, order); return cudaSuccess; } } // end namespace gpu diff --git a/hoomd/mpcd/SorterGPU.cuh b/hoomd/mpcd/SorterGPU.cuh index 14d6126412..c2c2502340 100644 --- a/hoomd/mpcd/SorterGPU.cuh +++ b/hoomd/mpcd/SorterGPU.cuh @@ -31,24 +31,22 @@ cudaError_t sort_apply(Scalar4* d_pos_alt, const unsigned int N, const unsigned int block_size); -//! Kernel driver to fill empty cell list entries with sentinel -cudaError_t sort_set_sentinel(unsigned int* d_cell_list, - const unsigned int* d_cell_np, - const Index2D& cli, - const unsigned int sentinel, - const unsigned int block_size); +//! Kernel driver to sort order +cudaError_t set_order(unsigned int* d_order, + unsigned int* d_cell_id, + const Scalar4* d_vel, + const unsigned int N_mpcd, + const unsigned int block_size); + +cudaError_t +compute_order(unsigned int* d_order, unsigned int* d_cell_id, const unsigned int N_mpcd); //! Driver for thrust to perform cell-list stream compaction unsigned int sort_cell_compact(unsigned int* d_order, const unsigned int* d_cell_list, const unsigned int num_items, - const unsigned int N_mpcd); + const unsigned int N); -//! Kernel driver to reverse map the particle ordering -cudaError_t sort_gen_reverse(unsigned int* d_rorder, - const unsigned int* d_order, - const unsigned int N, - const unsigned int block_size); } // end namespace gpu } // end namespace mpcd } // end namespace hoomd diff --git a/hoomd/mpcd/SorterGPU.h b/hoomd/mpcd/SorterGPU.h index 566db8aeb7..e57e5106c8 100644 --- a/hoomd/mpcd/SorterGPU.h +++ b/hoomd/mpcd/SorterGPU.h @@ -32,11 +32,10 @@ class PYBIND11_EXPORT SorterGPU : public mpcd::Sorter SorterGPU(std::shared_ptr sysdef, std::shared_ptr trigger); protected: - /// Kernel tuner for filling sentinels in cell list. - std::shared_ptr> m_sentinel_tuner; + GPUArray m_cell_id; //!< auxiliary array for sorting cell ids - /// Kernel tuner for setting reverse map. - std::shared_ptr> m_reverse_tuner; + /// Kernel tuner for filling sentinels in cell list. + std::shared_ptr> m_compute_order_tuner; //!< Kernel tuner for applying sorted order. std::shared_ptr> m_apply_tuner; diff --git a/hoomd/mpcd/module.cc b/hoomd/mpcd/module.cc index 15225ca61d..1f6efac3ed 100644 --- a/hoomd/mpcd/module.cc +++ b/hoomd/mpcd/module.cc @@ -20,10 +20,8 @@ namespace mpcd */ namespace detail { -void export_ATCollisionMethod(pybind11::module&); void export_BlockForce(pybind11::module&); void export_CellList(pybind11::module&); -void export_CellThermoCompute(pybind11::module&); void export_CollisionMethod(pybind11::module&); #ifdef ENABLE_MPI void export_Communicator(pybind11::module&); @@ -51,9 +49,7 @@ void export_SRDCollisionMethod(pybind11::module&); void export_StreamingMethod(pybind11::module&); void export_VirtualParticleFiller(pybind11::module&); #ifdef ENABLE_HIP -void export_ATCollisionMethodGPU(pybind11::module&); void export_CellListGPU(pybind11::module&); -void export_CellThermoComputeGPU(pybind11::module&); #ifdef ENABLE_MPI void export_CommunicatorGPU(pybind11::module&); #endif // ENABLE_MPI @@ -199,10 +195,8 @@ PYBIND11_MODULE(_mpcd, m) export_VirtualParticleFiller(m); export_StreamingMethod(m); - export_ATCollisionMethod(m); export_BlockForce(m); export_CellList(m); - export_CellThermoCompute(m); #ifdef ENABLE_MPI export_Communicator(m); #endif // ENABLE_MPI @@ -227,9 +221,7 @@ PYBIND11_MODULE(_mpcd, m) export_SineForce(m); export_SRDCollisionMethod(m); #ifdef ENABLE_HIP - export_ATCollisionMethodGPU(m); export_CellListGPU(m); - export_CellThermoComputeGPU(m); #ifdef ENABLE_MPI export_CommunicatorGPU(m); #endif // ENABLE_MPI diff --git a/hoomd/mpcd/pytest/test_collide.py b/hoomd/mpcd/pytest/test_collide.py index 390bf829e0..554e31f9ea 100644 --- a/hoomd/mpcd/pytest/test_collide.py +++ b/hoomd/mpcd/pytest/test_collide.py @@ -31,15 +31,27 @@ def test_cell_list(small_snap, simulation_factory): assert cl.num_cells == (20, 30, 40) +# @pytest.mark.parametrize( +# "cls, init_args", +# [ +# ( +# hoomd.mpcd.collide.AndersenThermostat, +# { +# "kT": 1.0, +# }, +# ), +# ( +# hoomd.mpcd.collide.StochasticRotationDynamics, +# { +# "angle": 90, +# }, +# ), +# ], +# ids=["AndersenThermostat", "StochasticRotationDynamics"], +# ) @pytest.mark.parametrize( "cls, init_args", [ - ( - hoomd.mpcd.collide.AndersenThermostat, - { - "kT": 1.0, - }, - ), ( hoomd.mpcd.collide.StochasticRotationDynamics, { @@ -47,7 +59,7 @@ def test_cell_list(small_snap, simulation_factory): }, ), ], - ids=["AndersenThermostat", "StochasticRotationDynamics"], + ids=["StochasticRotationDynamics"], ) class TestCollisionMethod: def test_create(self, small_snap, simulation_factory, cls, init_args): diff --git a/hoomd/mpcd/test/CMakeLists.txt b/hoomd/mpcd/test/CMakeLists.txt index 7e8ee23d24..cc4bbac2f9 100644 --- a/hoomd/mpcd/test/CMakeLists.txt +++ b/hoomd/mpcd/test/CMakeLists.txt @@ -1,8 +1,6 @@ # external_field test will not link properly due to separable compilation, disabling. set(TEST_LIST - at_collision_method cell_list - cell_thermo_compute force parallel_plate_geometry_filler planar_pore_geometry_filler @@ -19,9 +17,7 @@ if(ENABLE_MPI) SET(MPI_TEST_LIST ${MPI_TEST_LIST} ${_KEY}) ENDMACRO(ADD_TO_MPI_TESTS) - ADD_TO_MPI_TESTS(cell_communicator 8) ADD_TO_MPI_TESTS(cell_list 8) - ADD_TO_MPI_TESTS(cell_thermo_compute 8) ADD_TO_MPI_TESTS(communicator 8) ADD_TO_MPI_TESTS(parallel_plate_geometry_filler 8) ADD_TO_MPI_TESTS(planar_pore_geometry_filler 8) diff --git a/hoomd/mpcd/test/cell_list_mpi_test.cc b/hoomd/mpcd/test/cell_list_mpi_test.cc index acfc01e764..a54e349246 100644 --- a/hoomd/mpcd/test/cell_list_mpi_test.cc +++ b/hoomd/mpcd/test/cell_list_mpi_test.cc @@ -817,9 +817,6 @@ void celllist_basic_test(std::shared_ptr exec_conf, ArrayHandle h_cell_np(cl->getCellSizeArray(), access_location::host, access_mode::read); - ArrayHandle h_cell_list(cl->getCellList(), - access_location::host, - access_mode::read); Index3D ci = cl->getCellIndexer(); ArrayHandle h_vel(pdata->getVelocities(), access_location::host, @@ -878,9 +875,6 @@ void celllist_basic_test(std::shared_ptr exec_conf, ArrayHandle h_cell_np(cl->getCellSizeArray(), access_location::host, access_mode::read); - ArrayHandle h_cell_list(cl->getCellList(), - access_location::host, - access_mode::read); Index3D ci = cl->getCellIndexer(); ArrayHandle h_vel(pdata->getVelocities(), access_location::host, @@ -938,9 +932,6 @@ void celllist_basic_test(std::shared_ptr exec_conf, ArrayHandle h_cell_np(cl->getCellSizeArray(), access_location::host, access_mode::read); - ArrayHandle h_cell_list(cl->getCellList(), - access_location::host, - access_mode::read); Index3D ci = cl->getCellIndexer(); ArrayHandle h_vel(pdata->getVelocities(), access_location::host, diff --git a/hoomd/mpcd/test/cell_list_test.cc b/hoomd/mpcd/test/cell_list_test.cc index d816b9bc18..09302aff82 100644 --- a/hoomd/mpcd/test/cell_list_test.cc +++ b/hoomd/mpcd/test/cell_list_test.cc @@ -50,17 +50,6 @@ void celllist_dimension_test(std::shared_ptr exec_conf, CHECK_EQUAL_UINT(cell_indexer.getNumElements(), 6 * 8 * 10); UP_ASSERT(cl->getCellSizeArray().getNumElements() >= 6 * 8 * 10); // Each cell has one number - unsigned int Nmax = cl->getNmax(); - CHECK_EQUAL_UINT(Nmax, - 4); // Default is 4 particles per cell, ensure this happens if there's only one - - Index2D cli = cl->getCellListIndexer(); - CHECK_EQUAL_UINT(cli.getNumElements(), - 6 * 8 * 10 * Nmax); // Cell list indexer has Nmax entries per cell - - // Cell list uses amortized sizing, so must only be at least this big - UP_ASSERT(cl->getCellList().getNumElements() >= 6 * 8 * 10 * Nmax); - /*******************/ // Change the cell size, and ensure everything stays up to date cl->setGlobalDim(make_uint3(3, 4, 5)); @@ -75,13 +64,6 @@ void celllist_dimension_test(std::shared_ptr exec_conf, cell_indexer = cl->getCellIndexer(); CHECK_EQUAL_UINT(cell_indexer.getNumElements(), 3 * 4 * 5); UP_ASSERT(cl->getCellSizeArray().getNumElements() >= 3 * 4 * 5); // Each cell has one number - - cli = cl->getCellListIndexer(); - CHECK_EQUAL_UINT(cli.getNumElements(), - 3 * 4 * 5 * Nmax); // Cell list indexer has Nmax entries per cell - - // Cell list uses amortized sizing, so must only be at least this big - UP_ASSERT(cl->getCellList().getNumElements() >= 3 * 4 * 5 * Nmax); } //! Test for correct cell listing of a small system @@ -121,9 +103,6 @@ void celllist_small_test(std::shared_ptr exec_conf, ArrayHandle h_cell_np(cl->getCellSizeArray(), access_location::host, access_mode::read); - ArrayHandle h_cell_list(cl->getCellList(), - access_location::host, - access_mode::read); // validate that each cell has the right number Index3D ci = cl->getCellIndexer(); @@ -136,19 +115,6 @@ void celllist_small_test(std::shared_ptr exec_conf, CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 1, 0)], 1); CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 1, 1)], 1); - // check the particle ids in each cell - Index2D cli = cl->getCellListIndexer(); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 0, 0))], 0); - CHECK_EQUAL_UINT(h_cell_list.data[cli(1, ci(0, 0, 0))], 8); - - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 0, 1))], 4); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 1, 0))], 2); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 1, 1))], 6); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(1, 0, 0))], 1); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(1, 0, 1))], 5); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(1, 1, 0))], 3); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(1, 1, 1))], 7); - ArrayHandle h_vel(pdata_9->getVelocities(), access_location::host, access_mode::read); @@ -185,9 +151,6 @@ void celllist_small_test(std::shared_ptr exec_conf, ArrayHandle h_cell_np(cl->getCellSizeArray(), access_location::host, access_mode::read); - ArrayHandle h_cell_list(cl->getCellList(), - access_location::host, - access_mode::read); // validate that each cell has the right number Index3D ci = cl->getCellIndexer(); @@ -200,29 +163,6 @@ void celllist_small_test(std::shared_ptr exec_conf, CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 0, 0)], 0); CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 0, 1)], 0); CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 1, 0)], 0); - - // check the particle ids in each cell - Index2D cli = cl->getCellListIndexer(); - { - std::vector pids(5, 0); - for (unsigned int i = 0; i < 5; ++i) - { - pids[i] = h_cell_list.data[cli(i, ci(0, 0, 0))]; - } - sort(pids.begin(), pids.end()); - unsigned int check_pids[] = {0, 2, 4, 6, 8}; - UP_ASSERT_EQUAL(pids, check_pids); - } - { - std::vector pids(4, 0); - for (unsigned int i = 0; i < 4; ++i) - { - pids[i] = h_cell_list.data[cli(i, ci(1, 1, 1))]; - } - sort(pids.begin(), pids.end()); - unsigned int check_pids[] = {1, 3, 5, 7}; - UP_ASSERT_EQUAL(pids, check_pids); - } } // bring all particles into one box, which triggers a resize, and check that all particles @@ -425,9 +365,6 @@ void celllist_embed_test(std::shared_ptr exec_conf, ArrayHandle h_cell_np(cl->getCellSizeArray(), access_location::host, access_mode::read); - ArrayHandle h_cell_list(cl->getCellList(), - access_location::host, - access_mode::read); // validate that each cell has the right number Index3D ci = cl->getCellIndexer(); @@ -440,17 +377,6 @@ void celllist_embed_test(std::shared_ptr exec_conf, CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 1, 0)], 1); CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 1, 1)], 1); - // check the particle ids in each cell - Index2D cli = cl->getCellListIndexer(); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 0, 0))], 0); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 0, 1))], 4); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 1, 0))], 2); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 1, 1))], 6); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(1, 0, 0))], 1); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(1, 0, 1))], 5); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(1, 1, 0))], 3); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(1, 1, 1))], 7); - ArrayHandle h_vel(pdata_8->getVelocities(), access_location::host, access_mode::read); @@ -477,9 +403,6 @@ void celllist_embed_test(std::shared_ptr exec_conf, ArrayHandle h_cell_np(cl->getCellSizeArray(), access_location::host, access_mode::read); - ArrayHandle h_cell_list(cl->getCellList(), - access_location::host, - access_mode::read); // validate that each cell has the right number Index3D ci = cl->getCellIndexer(); @@ -492,45 +415,6 @@ void celllist_embed_test(std::shared_ptr exec_conf, CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 1, 0)], 2); CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 1, 1)], 2); - // check the particle ids in each cell - Index2D cli = cl->getCellListIndexer(); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 0, 0))], 0); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 0, 1))], 4); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 1, 0))], 2); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 1, 1))], 6); - // check two particles in cell (1,0,0) - { - std::vector result(2); - result[0] = h_cell_list.data[cli(0, ci(1, 0, 0))]; - result[1] = h_cell_list.data[cli(1, ci(1, 0, 0))]; - sort(result.begin(), result.end()); - UP_ASSERT_EQUAL(result, std::vector {1, 8}); - } - // check two particles in cell (1,0,1) - { - std::vector result(2); - result[0] = h_cell_list.data[cli(0, ci(1, 0, 1))]; - result[1] = h_cell_list.data[cli(1, ci(1, 0, 1))]; - sort(result.begin(), result.end()); - UP_ASSERT_EQUAL(result, std::vector {5, 10}); - } - // check two particles in cell (1,1,0) - { - std::vector result(2); - result[0] = h_cell_list.data[cli(0, ci(1, 1, 0))]; - result[1] = h_cell_list.data[cli(1, ci(1, 1, 0))]; - sort(result.begin(), result.end()); - UP_ASSERT_EQUAL(result, std::vector {3, 9}); - } - // check two particles in cell (1,1,1) - { - std::vector result(2); - result[0] = h_cell_list.data[cli(0, ci(1, 1, 1))]; - result[1] = h_cell_list.data[cli(1, ci(1, 1, 1))]; - sort(result.begin(), result.end()); - UP_ASSERT_EQUAL(result, std::vector {7, 11}); - } - ArrayHandle h_vel(pdata_8->getVelocities(), access_location::host, access_mode::read); @@ -579,9 +463,6 @@ void celllist_embed_test(std::shared_ptr exec_conf, ArrayHandle h_cell_np(cl->getCellSizeArray(), access_location::host, access_mode::read); - ArrayHandle h_cell_list(cl->getCellList(), - access_location::host, - access_mode::read); // validate that each cell has the right number Index3D ci = cl->getCellIndexer(); @@ -594,39 +475,6 @@ void celllist_embed_test(std::shared_ptr exec_conf, CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 1, 0)], 3); CHECK_EQUAL_UINT(h_cell_np.data[ci(1, 1, 1)], 2); - // check the particle ids in each cell - Index2D cli = cl->getCellListIndexer(); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 0, 0))], 0); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 0, 1))], 4); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 1, 0))], 2); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(1, 0, 0))], 1); - CHECK_EQUAL_UINT(h_cell_list.data[cli(0, ci(0, 1, 1))], 6); - // check two particles in cell (1,0,1) - { - std::vector result(2); - result[0] = h_cell_list.data[cli(0, ci(1, 0, 1))]; - result[1] = h_cell_list.data[cli(1, ci(1, 0, 1))]; - sort(result.begin(), result.end()); - UP_ASSERT_EQUAL(result, std::vector {5, 10}); - } - // check two particles in cell (1,1,0) - { - std::vector result(3); - result[0] = h_cell_list.data[cli(0, ci(1, 1, 0))]; - result[1] = h_cell_list.data[cli(1, ci(1, 1, 0))]; - result[2] = h_cell_list.data[cli(2, ci(1, 1, 0))]; - sort(result.begin(), result.end()); - UP_ASSERT_EQUAL(result, std::vector {3, 8, 9}); - } - // check two particles in cell (1,1,1) - { - std::vector result(2); - result[0] = h_cell_list.data[cli(0, ci(1, 1, 1))]; - result[1] = h_cell_list.data[cli(1, ci(1, 1, 1))]; - sort(result.begin(), result.end()); - UP_ASSERT_EQUAL(result, std::vector {7, 11}); - } - ArrayHandle h_vel(pdata_8->getVelocities(), access_location::host, access_mode::read); @@ -662,6 +510,347 @@ void celllist_embed_test(std::shared_ptr exec_conf, } } +//! Test for correct calculation of cell thermo properties for MPCD particles +template +void celllist_thermo_basic_test(std::shared_ptr exec_conf, + const Scalar3& L, + const Scalar3& tilt) + { + auto ref_box = std::make_shared(2.0); + auto box = std::make_shared(L); + box->setTiltFactors(tilt.x, tilt.y, tilt.z); + + std::shared_ptr> snap(new SnapshotSystemData()); + snap->global_box = box; + snap->particle_data.type_mapping.push_back("A"); + // place each particle in a different cell, doubling the first cell + snap->mpcd_data.resize(5); + snap->mpcd_data.type_mapping.push_back("A"); + snap->mpcd_data.position[0] = vec3(-0.5, -0.5, -0.5); + snap->mpcd_data.position[1] = vec3(-0.5, -0.5, -0.5); + snap->mpcd_data.position[2] = vec3(0.5, 0.5, 0.5); + snap->mpcd_data.position[3] = vec3(0.5, 0.5, 0.5); + snap->mpcd_data.position[4] = vec3(-0.5, 0.5, 0.5); + + snap->mpcd_data.velocity[0] = vec3(2.0, 0.0, 0.0); + snap->mpcd_data.velocity[1] = vec3(1.0, 0.0, 0.0); + snap->mpcd_data.velocity[2] = vec3(0.0, -3.0, 0.0); + snap->mpcd_data.velocity[3] = vec3(0.0, 0.0, -5.0); + snap->mpcd_data.velocity[4] = vec3(1.0, -1.0, 4.0); + std::shared_ptr sysdef(new SystemDefinition(snap, exec_conf)); + + std::shared_ptr pdata_5 = sysdef->getMPCDParticleData(); + + std::shared_ptr cl(new CL(sysdef, make_uint3(2, 2, 2), false)); + AllThermoRequest thermo_req(cl); + cl->compute(0); + { + const Index3D ci = cl->getCellIndexer(); + ArrayHandle h_cell_np(cl->getCellSizeArray(), + access_location::host, + access_mode::read); + ArrayHandle h_avg_vel(cl->getCellVelocities(), + access_location::host, + access_mode::read); + ArrayHandle h_cell_energy(cl->getCellEnergies(), + access_location::host, + access_mode::read); + ArrayHandle h_cell_temp(cl->getCellTemperature(), + access_location::host, + access_mode::read); + // Two particle cell (0,0,0) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].x, 1.5, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].y, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].z, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].w, 2.0, tol); + // energy, temperature (relative to COM), np + CHECK_CLOSE(h_cell_energy.data[ci(0, 0, 0)], 2.5, tol); + CHECK_CLOSE(h_cell_temp.data[ci(0, 0, 0)], 2.0 * 0.5 * 0.5 / 3.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(0, 0, 0)], 2); + + // Two particle cell (1,1,1) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].x, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].y, -1.5, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].z, -2.5, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].w, 2.0, tol); + // energy, temperature (relative to COM), np + CHECK_CLOSE(h_cell_energy.data[ci(1, 1, 1)], 17.0, tol); + CHECK_CLOSE(h_cell_temp.data[ci(1, 1, 1)], 2.0 * (1.5 * 1.5 + 2.5 * 2.5) / 3.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(1, 1, 1)], 2); + + // One particle cell (0,1,1) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].x, 1.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].y, -1.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].z, 4.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].w, 1.0, tol); + // Has kinetic energy, but temperature should be zero + CHECK_CLOSE(h_cell_energy.data[ci(0, 1, 1)], 9.0, tol); + CHECK_CLOSE(h_cell_temp.data[ci(0, 1, 1)], 0.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(0, 1, 1)], 1); + } + + // Check the net stats of the system + CHECK_CLOSE(cl->getNetMomentum().x, 4.0, tol); + CHECK_CLOSE(cl->getNetMomentum().y, -4.0, tol); + CHECK_CLOSE(cl->getNetMomentum().z, -1.0, tol); + CHECK_CLOSE(cl->getNetEnergy(), 28.5, tol); + CHECK_CLOSE(cl->getTemperature(), + 0.5 * (2.0 * 0.5 * 0.5 / 3.0 + 2.0 * (1.5 * 1.5 + 2.5 * 2.5) / 3.0), + tol); + + // increase the mass and make sure that energies depend on mass, but velocities don't + pdata_5->setMass(4.0); + cl->compute(1); + { + const Index3D ci = cl->getCellIndexer(); + ArrayHandle h_cell_np(cl->getCellSizeArray(), + access_location::host, + access_mode::read); + ArrayHandle h_avg_vel(cl->getCellVelocities(), + access_location::host, + access_mode::read); + ArrayHandle h_cell_energy(cl->getCellEnergies(), + access_location::host, + access_mode::read); + ArrayHandle h_cell_temp(cl->getCellTemperature(), + access_location::host, + access_mode::read); + + // Two particle cell (0,0,0) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].x, 1.5, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].y, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].z, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].w, 8.0, tol); + // energy, temperature (relative to COM), np, flag + CHECK_CLOSE(h_cell_energy.data[ci(0, 0, 0)], 4.0 * 2.5, tol); + CHECK_CLOSE(h_cell_temp.data[ci(0, 0, 0)], 4.0 * 2.0 * 0.5 * 0.5 / 3.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(0, 0, 0)], 2); + + // Two particle cell (1,1,1) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].x, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].y, -1.5, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].z, -2.5, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].w, 8.0, tol); + // energy, temperature (relative to COM), np, flag + CHECK_CLOSE(h_cell_energy.data[ci(1, 1, 1)], 4.0 * 17.0, tol); + CHECK_CLOSE(h_cell_temp.data[ci(1, 1, 1)], 4.0 * 2.0 * (1.5 * 1.5 + 2.5 * 2.5) / 3.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(1, 1, 1)], 2); + + // One particle cell (0,1,1) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].x, 1.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].y, -1.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].z, 4.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].w, 4.0, tol); + // Has kinetic energy, but temperature should be zero + CHECK_CLOSE(h_cell_energy.data[ci(0, 1, 1)], 4.0 * 9.0, tol); + CHECK_CLOSE(h_cell_temp.data[ci(0, 1, 1)], 0.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(0, 1, 1)], 1); + } + + // Check the net stats of the system + CHECK_CLOSE(cl->getNetMomentum().x, 4.0 * 4.0, tol); + CHECK_CLOSE(cl->getNetMomentum().y, 4.0 * -4.0, tol); + CHECK_CLOSE(cl->getNetMomentum().z, 4.0 * -1.0, tol); + CHECK_CLOSE(cl->getNetEnergy(), 4.0 * 28.5, tol); + CHECK_CLOSE(cl->getTemperature(), + 4.0 * 0.5 * (2.0 * 0.5 * 0.5 / 3.0 + 2.0 * (1.5 * 1.5 + 2.5 * 2.5) / 3.0), + tol); + + // switch a particle into a different cell, and make sure the DOF are reduced accordingly + pdata_5->setMass(1.0); + { + ArrayHandle h_pos(pdata_5->getPositions(), + access_location::host, + access_mode::readwrite); + h_pos.data[2] = make_scalar4(-0.5, -0.5, -0.5, 0.0); + } + cl->compute(2); + { + const Index3D ci = cl->getCellIndexer(); + ArrayHandle h_cell_np(cl->getCellSizeArray(), + access_location::host, + access_mode::read); + ArrayHandle h_avg_vel(cl->getCellVelocities(), + access_location::host, + access_mode::read); + ArrayHandle h_cell_energy(cl->getCellEnergies(), + access_location::host, + access_mode::read); + ArrayHandle h_cell_temp(cl->getCellTemperature(), + access_location::host, + access_mode::read); + + // Three particle cell (0,0,0) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].x, 1.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].y, -1.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].z, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].w, 3.0, tol); + // energy, temperature (relative to COM), np, flag + CHECK_CLOSE(h_cell_energy.data[ci(0, 0, 0)], 7.0, tol); + CHECK_CLOSE(h_cell_temp.data[ci(0, 0, 0)], + (2 * 1.0 * 1.0 + 2 * 1.0 * 1.0 + 2.0 * 2.0) / 6.0, + tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(0, 0, 0)], 3); + + // One particle cell (1,1,1) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].x, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].y, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].z, -5.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].w, 1.0, tol); + // energy, temperature (relative to COM), np, flag + CHECK_CLOSE(h_cell_energy.data[ci(1, 1, 1)], 12.5, tol); + CHECK_CLOSE(h_cell_temp.data[ci(1, 1, 1)], 0.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(1, 1, 1)], 1); + + // One particle cell (0,1,1) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].x, 1.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].y, -1.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].z, 4.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 1, 1)].w, 1.0, tol); + // Has kinetic energy, but temperature should be zero + CHECK_CLOSE(h_cell_energy.data[ci(0, 1, 1)], 9.0, tol); + CHECK_CLOSE(h_cell_temp.data[ci(0, 1, 1)], 0.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(0, 1, 1)], 1); + } + + // Check the net stats of the system, only average temperature should change now + CHECK_CLOSE(cl->getNetMomentum().x, 4.0, tol); + CHECK_CLOSE(cl->getNetMomentum().y, -4.0, tol); + CHECK_CLOSE(cl->getNetMomentum().z, -1.0, tol); + CHECK_CLOSE(cl->getNetEnergy(), 28.5, tol); + CHECK_CLOSE(cl->getTemperature(), (2 * 1.0 * 1.0 + 2 * 1.0 * 1.0 + 2.0 * 2.0) / 6.0, tol); + } + +//! Test for correct calculation of cell thermo properties with embedded particles +template +void celllist_thermo_embed_test(std::shared_ptr exec_conf, + const Scalar3& L, + const Scalar3& tilt) + { + auto ref_box = std::make_shared(2.0); + auto box = std::make_shared(L); + box->setTiltFactors(tilt.x, tilt.y, tilt.z); + + std::shared_ptr> snap(new SnapshotSystemData()); + snap->global_box = box; + { + SnapshotParticleData& pdata_snap = snap->particle_data; + pdata_snap.type_mapping.push_back("A"); + pdata_snap.resize(4); + + pdata_snap.pos[0] = vec3(-0.5, -0.5, -0.5); + pdata_snap.pos[1] = vec3(-0.5, -0.5, 0.5); + pdata_snap.pos[2] = vec3(0.5, -0.5, 0.5); + pdata_snap.pos[3] = vec3(0.5, 0.5, 0.5); + + pdata_snap.mass[0] = 3.0; + pdata_snap.mass[1] = 2.0; + pdata_snap.mass[2] = 4.0; + pdata_snap.mass[3] = 5.0; + + pdata_snap.vel[0] = vec3(-2.0, 0.0, 0.0); + pdata_snap.vel[1] = vec3(1.0, 0.0, 0.0); + pdata_snap.vel[2] = vec3(0.0, -3.0, 0.0); + pdata_snap.vel[3] = vec3(0.0, 0.0, -5.0); + } + snap->mpcd_data.resize(4); + snap->mpcd_data.type_mapping.push_back("A"); + snap->mpcd_data.position[0] = vec3(-0.5, -0.5, -0.5); + snap->mpcd_data.position[1] = vec3(-0.5, -0.5, 0.5); + snap->mpcd_data.position[2] = vec3(0.5, -0.5, 0.5); + snap->mpcd_data.position[3] = vec3(0.5, 0.5, 0.5); + + snap->mpcd_data.velocity[0] = vec3(2.0, 0.0, 0.0); + snap->mpcd_data.velocity[1] = vec3(1.0, 0.0, 0.0); + snap->mpcd_data.velocity[2] = vec3(0.0, -3.0, 0.0); + snap->mpcd_data.velocity[3] = vec3(0.0, 0.0, -5.0); + std::shared_ptr sysdef(new SystemDefinition(snap, exec_conf)); + + std::shared_ptr embed_pdata = sysdef->getParticleData(); + std::shared_ptr selector(new ParticleFilterAll()); + std::shared_ptr group(new ParticleGroup(sysdef, selector)); + + auto cl = std::make_shared(sysdef, 1.0, false); + cl->setEmbeddedGroup(group); + AllThermoRequest thermo_req(cl); + cl->compute(0); + { + const Index3D ci = cl->getCellIndexer(); + ArrayHandle h_cell_np(cl->getCellSizeArray(), + access_location::host, + access_mode::read); + ArrayHandle h_avg_vel(cl->getCellVelocities(), + access_location::host, + access_mode::read); + ArrayHandle h_cell_energy(cl->getCellEnergies(), + access_location::host, + access_mode::read); + ArrayHandle h_cell_temp(cl->getCellTemperature(), + access_location::host, + access_mode::read); + + // Cell (0,0,0) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].x, -1.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].y, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].z, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 0)].w, 4.0, tol); + // energy, temperature (relative to COM), np, flag + CHECK_CLOSE(h_cell_energy.data[ci(0, 0, 0)], 8.0, tol); + CHECK_CLOSE(h_cell_temp.data[ci(0, 0, 0)], 4.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(0, 0, 0)], 2); + + // Cell (0,0,1) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 1)].x, 1.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 1)].y, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 1)].z, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(0, 0, 1)].w, 3.0, tol); + // energy, temperature (relative to COM), np, flag + CHECK_CLOSE(h_cell_energy.data[ci(0, 0, 1)], 1.5, tol); + CHECK_CLOSE(h_cell_temp.data[ci(0, 0, 1)], 0.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(0, 0, 1)], 2); + + // Cell (1,0,1) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(1, 0, 1)].x, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 0, 1)].y, -3.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 0, 1)].z, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 0, 1)].w, 5.0, tol); + // Has kinetic energy, but temperature should be zero + CHECK_CLOSE(h_cell_energy.data[ci(1, 0, 1)], 22.5, tol); + CHECK_CLOSE(h_cell_temp.data[ci(1, 0, 1)], 0.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(1, 0, 1)], 2); + + // Cell (1,1,1) + // Average velocity, mass + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].x, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].y, 0.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].z, -5.0, tol); + CHECK_CLOSE(h_avg_vel.data[ci(1, 1, 1)].w, 6.0, tol); + // energy, temperature (relative to COM), np, flag + CHECK_CLOSE(h_cell_energy.data[ci(1, 1, 1)], 75.0, tol); + CHECK_CLOSE(h_cell_temp.data[ci(1, 1, 1)], 0.0, tol); + UP_ASSERT_EQUAL(h_cell_np.data[ci(1, 1, 1)], 2); + } + + // Check the net stats of the system + CHECK_CLOSE(cl->getNetMomentum().x, -1.0, tol); + CHECK_CLOSE(cl->getNetMomentum().y, -15.0, tol); + CHECK_CLOSE(cl->getNetMomentum().z, -30.0, tol); + CHECK_CLOSE(cl->getNetEnergy(), 107.0, tol); + CHECK_CLOSE(cl->getTemperature(), (4.0 + 0.0 + 0.0 + 0.0) / 4., tol); + } + //! dimension test case for MPCD CellList class UP_TEST(mpcd_cell_list_dimensions) { @@ -770,6 +959,22 @@ UP_TEST(mpcd_cell_list_embed_test_triclinic) make_scalar3(0.5, -0.75, 1.0)); } +//! test case for cell list velocity and energy +UP_TEST(mpcd_cell_list_thermo_basic) + { + celllist_thermo_basic_test( + std::make_shared(ExecutionConfiguration::CPU), + make_scalar3(2.0, 2.0, 2.0), + make_scalar3(0, 0, 0)); + } +UP_TEST(mpcd_cell_list_thermo_embed) + { + celllist_thermo_embed_test( + std::make_shared(ExecutionConfiguration::CPU), + make_scalar3(2.0, 2.0, 2.0), + make_scalar3(0, 0, 0)); + } + #ifdef ENABLE_HIP //! dimension test case for MPCD CellListGPU class UP_TEST(mpcd_cell_list_gpu_dimensions) @@ -878,4 +1083,20 @@ UP_TEST(mpcd_cell_list_gpu_embed_test_triclinic) make_scalar3(2.0, 2.0, 2.0), make_scalar3(0.5, -0.75, 1.0)); } + +//! test case for cell list velocity and energy +UP_TEST(mpcd_cell_list_thermo_basic_gpu) + { + celllist_thermo_basic_test( + std::make_shared(ExecutionConfiguration::GPU), + make_scalar3(2.0, 2.0, 2.0), + make_scalar3(0, 0, 0)); + } +UP_TEST(mpcd_cell_list_thermo_embed_gpu) + { + celllist_thermo_embed_test( + std::make_shared(ExecutionConfiguration::GPU), + make_scalar3(2.0, 2.0, 2.0), + make_scalar3(0, 0, 0)); + } #endif // ENABLE_HIP diff --git a/hoomd/mpcd/test/sorter_test.cc b/hoomd/mpcd/test/sorter_test.cc index b055f31966..abd045afdd 100644 --- a/hoomd/mpcd/test/sorter_test.cc +++ b/hoomd/mpcd/test/sorter_test.cc @@ -168,40 +168,6 @@ template void sorter_test(std::shared_ptr exec_ UP_ASSERT_EQUAL(__scalar_as_int(h_vel.data[6].w), 6); UP_ASSERT_EQUAL(__scalar_as_int(h_vel.data[7].w), 7); } - - // check that the cell list has been updated as well - { - ArrayHandle h_cl(cl->getCellList(), access_location::host, access_mode::read); - ArrayHandle h_np(cl->getCellSizeArray(), - access_location::host, - access_mode::read); - const Index3D& ci = cl->getCellIndexer(); - const Index2D& cli = cl->getCellListIndexer(); - - // all cells should have one particle, except the first cell, which has the embedded one - UP_ASSERT_EQUAL(h_np.data[ci(0, 0, 0)], 2); - UP_ASSERT_EQUAL(h_np.data[ci(1, 0, 0)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(0, 1, 0)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(1, 1, 0)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(0, 0, 1)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(1, 0, 1)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(0, 1, 1)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(1, 1, 1)], 1); - - // the particles should be in ascending order - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(1, 0, 0))], 1); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(0, 1, 0))], 2); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(1, 1, 0))], 3); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(0, 0, 1))], 4); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(1, 0, 1))], 5); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(0, 1, 1))], 6); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(1, 1, 1))], 7); - // do first cell separately, since it needs to be a sorted list - std::vector cell_0 - = {h_cl.data[cli(0, ci(0, 0, 0))], h_cl.data[cli(1, ci(0, 0, 0))]}; - std::sort(cell_0.begin(), cell_0.end()); - UP_ASSERT_EQUAL(cell_0, std::vector {0, 8}); - } } //! Test for MPCD sorting with virtual particles @@ -370,36 +336,6 @@ template void sorter_virtual_test(std::shared_ptr h_cl(cl->getCellList(), access_location::host, access_mode::read); - ArrayHandle h_np(cl->getCellSizeArray(), - access_location::host, - access_mode::read); - const Index3D& ci = cl->getCellIndexer(); - const Index2D& cli = cl->getCellListIndexer(); - - // all cells should have one particle - UP_ASSERT_EQUAL(h_np.data[ci(0, 0, 0)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(1, 0, 0)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(0, 1, 0)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(1, 1, 0)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(0, 0, 1)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(1, 0, 1)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(0, 1, 1)], 1); - UP_ASSERT_EQUAL(h_np.data[ci(1, 1, 1)], 1); - - // the particles should be in ascending order, with VPs interleaved unsorted - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(0, 0, 0))], 0); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(1, 0, 0))], 6); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(0, 1, 0))], 1); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(1, 1, 0))], 7); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(0, 0, 1))], 2); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(1, 0, 1))], 3); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(0, 1, 1))], 4); - UP_ASSERT_EQUAL(h_cl.data[cli(0, ci(1, 1, 1))], 5); - } } //! basic test case for MPCD sorter diff --git a/hoomd/mpcd/test/srd_collision_method_test.cc b/hoomd/mpcd/test/srd_collision_method_test.cc index 943b81b27d..713c040098 100644 --- a/hoomd/mpcd/test/srd_collision_method_test.cc +++ b/hoomd/mpcd/test/srd_collision_method_test.cc @@ -56,10 +56,7 @@ void srd_collision_method_basic_test(std::shared_ptr exe auto cl = std::make_shared(sysdef, 1.0, false); std::shared_ptr collide = std::make_shared(sysdef, 0, 2, 1, 130.); collide->setCellList(cl); - - // create a thermo, and use it to check the current - auto thermo = std::make_shared(sysdef, cl); - AllThermoRequest thermo_req(thermo); + AllThermoRequest thermo_req(cl); UP_ASSERT(!collide->peekCollide(0)); collide->collide(0); @@ -77,16 +74,16 @@ void srd_collision_method_basic_test(std::shared_ptr exe { // check net properties of cells, which should match our inputs - thermo->compute(0); - const Scalar3 mom = thermo->getNetMomentum(); + cl->compute(0); + const Scalar3 mom = cl->getNetMomentum(); CHECK_CLOSE(mom.x, orig_mom.x, tol_small); CHECK_CLOSE(mom.y, orig_mom.y, tol_small); CHECK_CLOSE(mom.z, orig_mom.z, tol_small); - const Scalar energy = thermo->getNetEnergy(); + const Scalar energy = cl->getNetEnergy(); CHECK_CLOSE(energy, orig_energy, tol_small); - const Scalar temp = thermo->getTemperature(); + const Scalar temp = cl->getTemperature(); CHECK_CLOSE(temp, orig_temp, tol_small); } @@ -151,16 +148,16 @@ void srd_collision_method_basic_test(std::shared_ptr exe } // recompute net properties, and make sure they are still the same - thermo->compute(2); - const Scalar3 mom = thermo->getNetMomentum(); + cl->compute(2); + const Scalar3 mom = cl->getNetMomentum(); CHECK_CLOSE(mom.x, orig_mom.x, tol_small); CHECK_CLOSE(mom.y, orig_mom.y, tol_small); CHECK_CLOSE(mom.z, orig_mom.z, tol_small); - const Scalar energy = thermo->getNetEnergy(); + const Scalar energy = cl->getNetEnergy(); CHECK_CLOSE(energy, orig_energy, tol_small); - const Scalar temp = thermo->getTemperature(); + const Scalar temp = cl->getTemperature(); CHECK_CLOSE(temp, orig_temp, tol_small); } @@ -284,10 +281,7 @@ void srd_collision_method_embed_test(std::shared_ptr exe std::shared_ptr collide = std::make_shared(sysdef, 0, 1, -1, 130.); collide->setCellList(cl); - - // create a thermo, and use it to check the current - auto thermo = std::make_shared(sysdef, cl); - AllThermoRequest thermo_req(thermo); + AllThermoRequest thermo_req(cl); // embed the particle group into the mpcd system std::shared_ptr selector_one(new ParticleFilterAll()); @@ -295,10 +289,10 @@ void srd_collision_method_embed_test(std::shared_ptr exe collide->setEmbeddedGroup(group_all); // Save original momentum for comparison as well - thermo->compute(0); - const Scalar orig_energy = thermo->getNetEnergy(); - const Scalar orig_temp = thermo->getTemperature(); - const Scalar3 orig_mom = thermo->getNetMomentum(); + cl->compute(0); + const Scalar orig_energy = cl->getNetEnergy(); + const Scalar orig_temp = cl->getTemperature(); + const Scalar3 orig_mom = cl->getNetMomentum(); collide->collide(0); { // velocity should be different now, but the mass should stay the same @@ -312,10 +306,10 @@ void srd_collision_method_embed_test(std::shared_ptr exe } // compute properties after rotation - thermo->compute(1); - Scalar energy = thermo->getNetEnergy(); - Scalar temp = thermo->getTemperature(); - Scalar3 mom = thermo->getNetMomentum(); + cl->compute(1); + Scalar energy = cl->getNetEnergy(); + Scalar temp = cl->getTemperature(); + Scalar3 mom = cl->getNetMomentum(); // energy (temperature) and momentum should be conserved after a collision CHECK_CLOSE(orig_energy, energy, tol_small); @@ -337,8 +331,7 @@ void srd_collision_method_thermostat_test(std::shared_ptr(sysdef, 1.0, false); std::shared_ptr collide = std::make_shared(sysdef, 0, 1, -1, 827); collide->setCellList(cl); - auto thermo = std::make_shared(sysdef, cl); - AllThermoRequest thermo_req(thermo); + AllThermoRequest thermo_req(cl); // timestep counter and number of samples to make uint64_t timestep = 0; @@ -351,8 +344,8 @@ void srd_collision_method_thermostat_test(std::shared_ptrcompute(timestep); - mean += thermo->getTemperature(); + cl->compute(timestep); + mean += cl->getTemperature(); collide->collide(timestep++); } mean /= N; @@ -366,8 +359,8 @@ void srd_collision_method_thermostat_test(std::shared_ptrcompute(timestep); - mean += thermo->getTemperature(); + cl->compute(timestep); + mean += cl->getTemperature(); collide->collide(timestep++); } mean /= N; diff --git a/hoomd/mpcd/test/utils.h b/hoomd/mpcd/test/utils.h index 0ceda735a3..a19b8c0ec8 100644 --- a/hoomd/mpcd/test/utils.h +++ b/hoomd/mpcd/test/utils.h @@ -5,7 +5,6 @@ #define MPCD_TEST_UTILS_H_ #include "hoomd/mpcd/CellList.h" -#include "hoomd/mpcd/CellThermoCompute.h" namespace hoomd { @@ -15,15 +14,14 @@ class AllThermoRequest public: //! Constructor /*! - * \param thermo Thermo compute to supply flags to. + * \param cl CellList to supply flags to. * - * \post This object is connected to \a thermo. + * \post This object is connected to \a cl. */ - AllThermoRequest(std::shared_ptr thermo) : m_thermo(thermo) + AllThermoRequest(std::shared_ptr cl) : m_cl(cl) { - if (m_thermo) - m_thermo->getFlagsSignal().connect( - this); + if (m_cl) + m_cl->getFlagsSignal().connect(this); } //! Destructor @@ -32,8 +30,8 @@ class AllThermoRequest */ ~AllThermoRequest() { - if (m_thermo) - m_thermo->getFlagsSignal().disconnect( + if (m_cl) + m_cl->getFlagsSignal().disconnect( this); } @@ -48,7 +46,7 @@ class AllThermoRequest } private: - std::shared_ptr m_thermo; + std::shared_ptr m_cl; }; Scalar3 scale(const Scalar3& ref_pos, std::shared_ptr ref_box, std::shared_ptr box)