From 779f6540cd808de9125c5c562cbbc6f67ca3efc5 Mon Sep 17 00:00:00 2001 From: Landon Owen Date: Mon, 10 Mar 2025 11:23:01 -0700 Subject: [PATCH 1/5] Added LC flux machine names, added call to include the flux python library to the system path for fluxScheduled, removed rztopaz machine name --- ats/atsMachines/fluxScheduled.py | 10 +++++++--- ats/configuration.py | 21 ++++++++++----------- ats/util/generic_utils.py | 6 +++++- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/ats/atsMachines/fluxScheduled.py b/ats/atsMachines/fluxScheduled.py index acd2d90..874d2fa 100755 --- a/ats/atsMachines/fluxScheduled.py +++ b/ats/atsMachines/fluxScheduled.py @@ -10,15 +10,14 @@ """ import os +import sys import time from math import ceil -import flux -import flux.job - from ats import terminal from ats.atsMachines import lcMachines from ats.tests import AtsTest +from ats.utils.generic_utils import runCommand from ats import configuration from ats import log @@ -40,6 +39,11 @@ def init(self): Sets ceiling on number of nodes and cores in the allocation. Defines a persistent handle to use to connect to the broker. """ + # Add the flux python library to the system path + flux_python_path = runCommand("flux config builtin python_path") + sys.path.append(flux_python_path) + import flux + import flux.job self.fluxHandle = flux.Flux() self.numNodes = int( flux.resource.list.resource_list(self.fluxHandle).get().up.nnodes diff --git a/ats/configuration.py b/ats/configuration.py index 653bac4..8946d77 100644 --- a/ats/configuration.py +++ b/ats/configuration.py @@ -454,7 +454,7 @@ def get_machine_factory(module_name, machine_class, module_name. "machine_package" tells Python where module_name can be found if not in the project's root directory. """ - log("Machine Factory: importing {} from {}".format(module_name, machine_package), + log(f"Machine Factory: importing {module_name} from {machine_package}", echo=False) try: machine_module = importlib.import_module(f'.{module_name}', @@ -462,10 +462,14 @@ def get_machine_factory(module_name, machine_class, machine_factory = getattr(machine_module, machine_class) return machine_factory - except ModuleNotFoundError: - log(f"Module '{module_name}' not found in package '{machine_package}'. Continuing search.", - echo=False) - return None + except ModuleNotFoundError as e: + if (e == ModuleNotFoundError(module_name)): + log(f"Module {module_name} not found in package {machine_package}. Continuing search.", + echo=False) + return None + else: + # If a module error occurs for a module other than module_name, raise an error + raise e def get_machine(file_text, file_name, is_batch=False): header = '#BATS:' if is_batch else '#ATS:' @@ -524,12 +528,7 @@ def get_machine_entry_points(machine_class): echo=False) for name, machine_factory in ats_machines.items(): if machine_class in machine_factory.value: - log("Machine Factory: Found machine {} of class {}: {}".format( - name, - machine_class, - machine_factory - )) - + log(f"Machine Factory: Found machine {name} of class {machine_class}: {machine_factory}") return machine_factory.load()(machine_class, -1) # Downstream needs to be able to detect if machine isn't found diff --git a/ats/util/generic_utils.py b/ats/util/generic_utils.py index d57ec4d..71aaed8 100644 --- a/ats/util/generic_utils.py +++ b/ats/util/generic_utils.py @@ -358,7 +358,7 @@ def set_machine_type_based_on_sys_type(): elif host.startswith('herd'): os.environ['MACHINE_TYPE'] = 'slurm32' - elif host.startswith('rzgenie') or host.startswith('rztopaz') or host.startswith('rztrona') or \ + elif host.startswith('rzgenie') or host.startswith('rztrona') or \ host.startswith('borax') or host.startswith('quartz') or host.startswith('agate') or \ host.startswith('pascal') or host.startswith('jade') or host.startswith('mica'): os.environ['MACHINE_TYPE'] = 'slurm36' @@ -372,6 +372,10 @@ def set_machine_type_based_on_sys_type(): elif host.startswith('mammoth'): os.environ['MACHINE_TYPE'] = 'slurm128' + elif host.startswith('tioga') or host.startswith('rzadams') or \ + host.startswith('rzvernal') or host.startswith('tuolumne'): + os.environ['MACHINE_TYPE'] = 'flux00' + elif os.environ['SYS_TYPE'] in ['bgqos_0']: os.environ['MACHINE_TYPE'] = 'bgqos_0_ASQ' From f254e75898d8bb637802ba1d98a0e70b4a180458 Mon Sep 17 00:00:00 2001 From: Landon Owen Date: Mon, 10 Mar 2025 11:31:54 -0700 Subject: [PATCH 2/5] Added corona to the flux machines --- ats/util/generic_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ats/util/generic_utils.py b/ats/util/generic_utils.py index 71aaed8..b461c7b 100644 --- a/ats/util/generic_utils.py +++ b/ats/util/generic_utils.py @@ -373,7 +373,8 @@ def set_machine_type_based_on_sys_type(): os.environ['MACHINE_TYPE'] = 'slurm128' elif host.startswith('tioga') or host.startswith('rzadams') or \ - host.startswith('rzvernal') or host.startswith('tuolumne'): + host.startswith('rzvernal') or host.startswith('tuolumne') or \ + host.startswith('corona'): os.environ['MACHINE_TYPE'] = 'flux00' elif os.environ['SYS_TYPE'] in ['bgqos_0']: From 68bcf0967c57dde7081921d3d79089a36cb98576 Mon Sep 17 00:00:00 2001 From: Landon Owen Date: Mon, 10 Mar 2025 13:48:29 -0700 Subject: [PATCH 3/5] Fix typo for ats util --- ats/atsMachines/fluxScheduled.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ats/atsMachines/fluxScheduled.py b/ats/atsMachines/fluxScheduled.py index 874d2fa..66b7cf4 100755 --- a/ats/atsMachines/fluxScheduled.py +++ b/ats/atsMachines/fluxScheduled.py @@ -17,7 +17,7 @@ from ats import terminal from ats.atsMachines import lcMachines from ats.tests import AtsTest -from ats.utils.generic_utils import runCommand +from ats.util.generic_utils import runCommand from ats import configuration from ats import log From a7dd0d9f98fbcf5e78ba7560fc4c74a530029630 Mon Sep 17 00:00:00 2001 From: Landon Owen Date: Mon, 10 Mar 2025 14:00:41 -0700 Subject: [PATCH 4/5] Fix bug with flux python path string --- ats/atsMachines/fluxScheduled.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ats/atsMachines/fluxScheduled.py b/ats/atsMachines/fluxScheduled.py index 66b7cf4..3b5790f 100755 --- a/ats/atsMachines/fluxScheduled.py +++ b/ats/atsMachines/fluxScheduled.py @@ -41,7 +41,7 @@ def init(self): """ # Add the flux python library to the system path flux_python_path = runCommand("flux config builtin python_path") - sys.path.append(flux_python_path) + sys.path.append(flux_python_path[0].strip()) import flux import flux.job self.fluxHandle = flux.Flux() @@ -516,7 +516,7 @@ def remainingCapacity(self): return 0 elif self.numProcsAvailable < 1: return 0 - elif self.numGPUsAvailable < 1 and self.numGPUs is not 0: + elif self.numGPUsAvailable < 1 and self.numGPUs ~= 0: return 0 else: return self.numProcsAvailable From f788b9e28dba72d27b63542ba06f97b3d07b0534 Mon Sep 17 00:00:00 2001 From: Landon Owen Date: Mon, 10 Mar 2025 14:03:12 -0700 Subject: [PATCH 5/5] Change ~= to != --- ats/atsMachines/fluxScheduled.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ats/atsMachines/fluxScheduled.py b/ats/atsMachines/fluxScheduled.py index 3b5790f..7d5d336 100755 --- a/ats/atsMachines/fluxScheduled.py +++ b/ats/atsMachines/fluxScheduled.py @@ -516,7 +516,7 @@ def remainingCapacity(self): return 0 elif self.numProcsAvailable < 1: return 0 - elif self.numGPUsAvailable < 1 and self.numGPUs ~= 0: + elif self.numGPUsAvailable < 1 and self.numGPUs != 0: return 0 else: return self.numProcsAvailable