Skip to content

Commit 180c901

Browse files
committed
[B200] OSU: fix baseline for mbw_mr on p6-b200.48xlarge.
1 parent 01318e1 commit 180c901

File tree

1 file changed

+7
-2
lines changed
  • tests/integration-tests/tests/performance_tests

1 file changed

+7
-2
lines changed

tests/integration-tests/tests/performance_tests/test_osu.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,11 @@ def _test_osu_benchmarks_multiple_bandwidth(
239239
):
240240
instance_bandwidth_dict = {
241241
# Bandwidth is expressed here in MBps.
242+
# Baseline does not reflect the theoretical max bandwidth declared by EC2 because
243+
# the way we run OSU with OpenMPI does not leverage the full bandwidth.
244+
# As long as we do not fix the way we run OSU, this baseline reflects values
245+
# that we considered ok with such limitation.
246+
#
242247
# Expected bandwidth for p4d and p4de (4 * 100 Gbps NICS -> declared NetworkPerformance 400 Gbps):
243248
# OMPI 4.1.0: ~330Gbps = 41250MB/s with Placement Group
244249
# OMPI 4.1.0: ~252Gbps = 31550MB/s without Placement Group
@@ -252,8 +257,8 @@ def _test_osu_benchmarks_multiple_bandwidth(
252257
"trn1.32xlarge": 80000, # Equivalent to a theoretical maximum of a single 640Gbps card
253258
# 32 100 Gbps NICS -> declared NetworkPerformance 3200 Gbps = 400000MBps (80% is 320000MBps)
254259
"p5en.48xlarge": 320000,
255-
# 8 200 Gbps NICS -> declared NetworkPerformance 1600 Gbps = 200000MBps (80% is 160000MBps)
256-
"p6-b200.48xlarge": 160000,
260+
# 8 200 Gbps NICS -> declared NetworkPerformance 3200 Gbps = 400000MBps (acceptable is 58% ~= 232000 MBps)
261+
"p6-b200.48xlarge": 232000,
257262
}
258263
num_instances = 2
259264
mpi_version = "openmpi"

0 commit comments

Comments
 (0)