use standard limit for A64FX: only use quarter of available cores

boegel · boegel · commit 59aeaf566aab · 2025-06-17T20:42:59.000+02:00
diff --git a/eb_hooks.py b/eb_hooks.py
@@ -143,6 +143,8 @@ def post_ready_hook(self, *args, **kwargs):
     # get CPU target
     cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
 
+    new_parallel = parallel
+
     # check if we have limits defined for this software
     if self.name in PARALLELISM_LIMITS:
         limits = PARALLELISM_LIMITS[self.name]
@@ -158,11 +160,16 @@ def post_ready_hook(self, *args, **kwargs):
         else:
             return  # no applicable limits found
 
-        # apply the limit if it's different from current
-        if new_parallel != parallel:
-            self.cfg[parallel_param] = new_parallel
-            msg = "limiting parallelism to %s (was %s) for %s on %s to avoid out-of-memory failures during building/testing"
-            print_msg(msg % (new_parallel, parallel, self.name, cpu_target), log=self.log)
+    # check if there's a general limit set for CPU target
+    elif cpu_target in PARALLELISM_LIMITS:
+        operation_func, operation_args = PARALLELISM_LIMITS[cpu_target]
+        new_parallel = operation_func(parallel, operation_args)
+
+    # apply the limit if it's different from current
+    if new_parallel != parallel:
+        self.cfg[parallel_param] = new_parallel
+        msg = "limiting parallelism to %s (was %s) for %s on %s to avoid out-of-memory failures during building/testing"
+        print_msg(msg % (new_parallel, parallel, self.name, cpu_target), log=self.log)
 
 
 def pre_prepare_hook(self, *args, **kwargs):
@@ -1376,27 +1383,22 @@ def set_maximum(parallel, max_value):
 # specific CPU target is defined in the data structure below. If not, it checks for
 # the generic '*' entry.
 PARALLELISM_LIMITS = {
+    # by default, only use quarter of cores when building for A64FX;
+    # this is done because total memory is typically limited on A64FX due to HBM,
+    # Deucalion has 32GB HBM for 48 cores per node
+    CPU_TARGET_A64FX: (divide_by_factor, 4),
+    # software-specific limits
     'libxc': {
         '*': (divide_by_factor, 2),
-        CPU_TARGET_A64FX: (set_maximum, 12),
-    },
-    'nodejs': {
-        CPU_TARGET_A64FX: (divide_by_factor, 2),
     },
     'MBX': {
         '*': (divide_by_factor, 2),
     },
-    'PyTorch': {
-        CPU_TARGET_A64FX: (divide_by_factor, 4),
-    },
     'TensorFlow': {
         '*': (divide_by_factor, 2),
         CPU_TARGET_A64FX: (set_maximum, 8),
     },
     'Qt5': {
         CPU_TARGET_A64FX: (set_maximum, 8),
     },
-    'ROOT': {
-        CPU_TARGET_A64FX: (divide_by_factor, 2),
-    },
 }