Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions gloo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ list(APPEND GLOO_SRCS
"${CMAKE_CURRENT_SOURCE_DIR}/allgatherv.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/allreduce.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/allreduce_local.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/allreduce_shm.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/alltoall.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/alltoallv.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/barrier.cc"
Expand All @@ -34,6 +35,7 @@ list(APPEND GLOO_HDRS
"${CMAKE_CURRENT_SOURCE_DIR}/allreduce_local.h"
"${CMAKE_CURRENT_SOURCE_DIR}/allreduce_ring.h"
"${CMAKE_CURRENT_SOURCE_DIR}/allreduce_ring_chunked.h"
"${CMAKE_CURRENT_SOURCE_DIR}/allreduce_shm.h"
"${CMAKE_CURRENT_SOURCE_DIR}/alltoall.h"
"${CMAKE_CURRENT_SOURCE_DIR}/alltoallv.h"
"${CMAKE_CURRENT_SOURCE_DIR}/barrier.h"
Expand Down
13 changes: 12 additions & 1 deletion gloo/allreduce.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "gloo/common/logging.h"
#include "gloo/math.h"
#include "gloo/types.h"
#include "gloo/allreduce_shm.h"

namespace gloo {

Expand Down Expand Up @@ -131,14 +132,23 @@ void allreduce(const detail::AllreduceOptionsImpl& opts) {
return;
}

switch (opts.algorithm) {
auto algorithm = opts.algorithm;
if (is_intra_node(opts)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing a broadcast prior to every single allreduce seems pretty expensive -- can we move this logic to the TCP init in createAndConnectAllPairs? We have enough information there I believe to compute the topology since we have all the hostnames.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other option would be to inspect the pairs that are participating and check if they all share the same IP

Copy link
Author

@gaopengff gaopengff Sep 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for you advice. I have moved intra-node checking to createAndConnectAllPairs and it would use hostnames for checking. Could you help review? Thanks.

algorithm = detail::AllreduceOptionsImpl::SHM;
}


switch (algorithm) {
case detail::AllreduceOptionsImpl::UNSPECIFIED:
case detail::AllreduceOptionsImpl::RING:
ring(opts, reduceInputs, broadcastOutputs);
break;
case detail::AllreduceOptionsImpl::BCUBE:
bcube(opts, reduceInputs, broadcastOutputs);
break;
case detail::AllreduceOptionsImpl::SHM:
shm(opts);
break;
default:
GLOO_ENFORCE(false, "Algorithm not handled.");
}
Expand All @@ -153,6 +163,7 @@ void ring(
const auto slot = Slot::build(kAllreduceSlotPrefix, opts.tag);
const size_t totalBytes = opts.elements * opts.elementSize;


// Note: context->size > 1
const auto recvRank = (context->size + context->rank + 1) % context->size;
const auto sendRank = (context->size + context->rank - 1) % context->size;
Expand Down
1 change: 1 addition & 0 deletions gloo/allreduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ struct AllreduceOptionsImpl {
UNSPECIFIED = 0,
RING = 1,
BCUBE = 2,
SHM = 3,
};

explicit AllreduceOptionsImpl(const std::shared_ptr<Context>& context)
Expand Down
Loading
Loading