Skip to content

use standard limit for A64FX: only use quarter of available cores #17

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 17 additions & 15 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ def post_ready_hook(self, *args, **kwargs):
# get CPU target
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')

new_parallel = parallel

# check if we have limits defined for this software
if self.name in PARALLELISM_LIMITS:
limits = PARALLELISM_LIMITS[self.name]
Expand All @@ -158,11 +160,16 @@ def post_ready_hook(self, *args, **kwargs):
else:
return # no applicable limits found

# apply the limit if it's different from current
if new_parallel != parallel:
self.cfg[parallel_param] = new_parallel
msg = "limiting parallelism to %s (was %s) for %s on %s to avoid out-of-memory failures during building/testing"
print_msg(msg % (new_parallel, parallel, self.name, cpu_target), log=self.log)
# check if there's a general limit set for CPU target
elif cpu_target in PARALLELISM_LIMITS:
operation_func, operation_args = PARALLELISM_LIMITS[cpu_target]
new_parallel = operation_func(parallel, operation_args)

# apply the limit if it's different from current
if new_parallel != parallel:
self.cfg[parallel_param] = new_parallel
msg = "limiting parallelism to %s (was %s) for %s on %s to avoid out-of-memory failures during building/testing"
print_msg(msg % (new_parallel, parallel, self.name, cpu_target), log=self.log)


def pre_prepare_hook(self, *args, **kwargs):
Expand Down Expand Up @@ -1376,27 +1383,22 @@ def set_maximum(parallel, max_value):
# specific CPU target is defined in the data structure below. If not, it checks for
# the generic '*' entry.
PARALLELISM_LIMITS = {
# by default, only use quarter of cores when building for A64FX;
# this is done because total memory is typically limited on A64FX due to HBM,
# Deucalion has 32GB HBM for 48 cores per node
CPU_TARGET_A64FX: (divide_by_factor, 4),
# software-specific limits
'libxc': {
'*': (divide_by_factor, 2),
CPU_TARGET_A64FX: (set_maximum, 12),
},
'nodejs': {
CPU_TARGET_A64FX: (divide_by_factor, 2),
},
'MBX': {
'*': (divide_by_factor, 2),
},
'PyTorch': {
CPU_TARGET_A64FX: (divide_by_factor, 4),
},
'TensorFlow': {
'*': (divide_by_factor, 2),
CPU_TARGET_A64FX: (set_maximum, 8),
},
'Qt5': {
CPU_TARGET_A64FX: (set_maximum, 8),
},
'ROOT': {
CPU_TARGET_A64FX: (divide_by_factor, 2),
},
}
Loading