From 233016e2dca343810312c0aaa294270e9f6b8df4 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Tue, 24 Sep 2024 21:29:31 +0300 Subject: [PATCH] Add information about collective operations to 03-mpi-api --- 03-mpi-api/03-mpi-api.tex | 99 +++++++++++++++++++++++++++++++++++++-- 03-mpi-api/03-mpi-api.toc | 3 -- 2 files changed, 95 insertions(+), 7 deletions(-) delete mode 100644 03-mpi-api/03-mpi-api.toc diff --git a/03-mpi-api/03-mpi-api.tex b/03-mpi-api/03-mpi-api.tex index a47f17b..a9ce1c7 100644 --- a/03-mpi-api/03-mpi-api.tex +++ b/03-mpi-api/03-mpi-api.tex @@ -97,6 +97,7 @@ \section{Advanced Send/Receive API} \begin{frame}{\texttt{MPI\_Irecv}} Non-Blocking Receive function. Initiates a receive operation that returns immediately. + \texttt{int MPI\_Irecv(void *buf, int count, MPI\_Datatype datatype, int source, int tag, MPI\_Comm comm, MPI\_Request *request);} Parameters: @@ -150,27 +151,113 @@ \section{Synchronization} \section{Collective operations} \begin{frame}{Collective operations} + Operations involving all processes within a communicator. + + Characteristics: + \begin{itemize} + \item Implicit synchronization among processes. + \item Cannot be initiated between subsets unless a new communicator is created. + \end{itemize} + + Examples: + \begin{itemize} + \item Data movement operations (e.g., \texttt{MPI\_Bcast}, \texttt{MPI\_Gather}). + \item Reduction operations (e.g., \texttt{MPI\_Reduce}, \texttt{MPI\_Allreduce}). + \end{itemize} + + Benefits (why use them instead of send/recv?): + \begin{itemize} + \item Optimized for underlying hardware and common user scenarios. + \item Simplifies code and improves readability. + \end{itemize} +\end{frame} + +\begin{frame}{Broadcast (\texttt{MPI\_Bcast})} + Send data from one process to all other processes. + + \texttt{int MPI\_Bcast(void *buffer, int count, MPI\_Datatype datatype, int root, MPI\_Comm comm);} + + Parameters: + \begin{itemize} + \item buffer: Starting address of buffer. + \item count: Number of entries in buffer. + \item datatype: Data type of buffer elements. + \item root: Rank of broadcast root. + \item comm: Communicator. + \end{itemize} \end{frame} -\begin{frame}{Broadcast} +\begin{frame}{Reduction} + Perform a global reduction operation (e.g., sum, max) across all processes. Calculate the total sum of values distributed across processes. + + Can be seen as the opposite operation to broadcast. + + \texttt{int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);} + + Supported operations: + \begin{itemize} + \item \texttt{MPI\_SUM} + \item \texttt{MPI\_PROD} + \item \texttt{MPI\_MAX} + \item \texttt{MPI\_MIN} + \end{itemize} \end{frame} \begin{frame}{\texttt{MPI\_Gather}} + Collect data from all processes to a single root process. + + \texttt{int MPI\_Gather(const void *sendbuf, int sendcount, MPI\_Datatype sendtype, void *recvbuf, int recvcount, MPI\_Datatype recvtype, int root, MPI\_Comm comm);} + + Parameters: + \begin{itemize} + \item sendbuf: Starting address of send buffer. + \item recvbuf: Starting address of receive buffer (significant only at root). + \end{itemize} \end{frame} \begin{frame}{\texttt{MPI\_Scatter}} + Distribute distinct chunks of data from root to all processes. + + \texttt{int MPI\_Scatter(const void *sendbuf, int sendcount, MPI\_Datatype sendtype, void *recvbuf, int recvcount, MPI\_Datatype recvtype, int root, MPI\_Comm comm);} + + Parameters: + \begin{itemize} + \item \texttt{sendbuf}: Starting address of send buffer (significant only at root). + \item \texttt{recvbuf}: Starting address of receive buffer. + \end{itemize} \end{frame} \begin{frame}{\texttt{MPI\_AllGather}} -\end{frame} + Gather data from all processes and distributes the combined data to all processes. + + \texttt{int MPI\_Allgather(const void *sendbuf, int sendcount, MPI\_Datatype sendtype, void *recvbuf, int recvcount, MPI\_Datatype recvtype, MPI\_Comm comm);} -\begin{frame}{All-to-All} + Usage of this function reduces the need for separate gather and broadcast operations. \end{frame} -\begin{frame}{Reduction} +\begin{frame}{All-to-All (\texttt{MPI\_Alltoall})} + Description: Each process sends data to and receives data from all other processes. It can be seen as transposing a matrix distributed across processes. + + \texttt{int MPI\_Alltoall(const void *sendbuf, int sendcount, MPI\_Datatype sendtype, void *recvbuf, int recvcount, MPI\_Datatype recvtype, MPI\_Comm comm);} + + Note: This operation is communication-intensive. \end{frame} \begin{frame}{All API have not blocking versions} + Non-Blocking collectives operations allow overlapping communication with computation. + + Examples: + \begin{itemize} + \item \texttt{MPI\_Ibcast}: Non-blocking broadcast. + \item \texttt{MPI\_Ireduce}: Non-blocking reduction. + \item \texttt{MPI\_Iallgather}: Non-blocking all-gather. + \end{itemize} + + \texttt{int MPI\_Ibcast(void *buffer, int count, MPI\_Datatype datatype, int root, MPI\_Comm comm, MPI\_Request *request);} + + \texttt{int MPI\_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI\_Datatype datatype, MPI\_Op op, int root, MPI\_Comm comm, MPI\_Request *request);} + + Usage flow is the same as for \texttt{MPI\_Isend}/\texttt{MPI\_Irecv}: Initiate the operation and later wait for its completion using \texttt{MPI\_Wait} or \texttt{MPI\_Test}. \end{frame} \begin{frame} @@ -179,6 +266,10 @@ \section{Collective operations} \end{frame} \begin{frame}{References} + \begin{enumerate} + \item MPI Standard \href{https://www.mpi-forum.org/docs/}{https://www.mpi-forum.org/docs/} + \item Open MPI v4.0.7 documentation: \href{https://www.open-mpi.org/doc/v4.0/}{https://www.open-mpi.org/doc/v4.0/} + \end{enumerate} \end{frame} \end{document} diff --git a/03-mpi-api/03-mpi-api.toc b/03-mpi-api/03-mpi-api.toc deleted file mode 100644 index 597ca8b..0000000 --- a/03-mpi-api/03-mpi-api.toc +++ /dev/null @@ -1,3 +0,0 @@ -\beamer@sectionintoc {1}{Advanced Send/Receive API}{3}{0}{1} -\beamer@sectionintoc {2}{Synchronization}{6}{0}{2} -\beamer@sectionintoc {3}{Collective operations}{8}{0}{3}