Skip to content

Commit 4afaaad

Browse files
committed
revert changes to SitePackage.lua + don't rebuild OpenMPI
1 parent 172242b commit 4afaaad

File tree

2 files changed

+21
-15
lines changed

2 files changed

+21
-15
lines changed

create_lmodsitepackage.py

+21
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,31 @@
8484
end
8585
end
8686
87+
local function eessi_openmpi_load_hook(t)
88+
-- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1,
89+
-- to work around hang/crash due to bug in OpenMPI;
90+
-- see https://gitlab.com/eessi/support/-/issues/41
91+
local frameStk = require("FrameStk"):singleton()
92+
local mt = frameStk:mt()
93+
local moduleName = string.match(t.modFullName, "(.-)/")
94+
local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or ""
95+
if (moduleName == "OpenMPI") and (cpuTarget == "aarch64/neoverse_v1") then
96+
local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI"
97+
LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)")
98+
local ompiMcaBtl = os.getenv("OMPI_MCA_btl")
99+
if ompiMcaBtl == nil then
100+
setenv("OMPI_MCA_btl", "^smcuda")
101+
else
102+
setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda")
103+
end
104+
end
105+
end
106+
87107
-- Combine both functions into a single one, as we can only register one function as load hook in lmod
88108
-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed
89109
function eessi_load_hook(t)
90110
eessi_cuda_enabled_load_hook(t)
111+
eessi_openmpi_load_hook(t)
91112
end
92113
93114

easystacks/pilot.nessi.no/2023.06/rebuilds/20240409-eb-4.9.0-OpenMPI-4.1.x-fix-smcuda.yml

-15
This file was deleted.

0 commit comments

Comments
 (0)