diff --git a/README.md b/README.md index 2b773d12..232300d7 100644 --- a/README.md +++ b/README.md @@ -227,7 +227,7 @@ Formatting is only available for jobs and not for partition and partition_hourly Do note: `azslurm cost` relies on slurm's admincomment feature to associate specific vm_size and meter info for jobs. ### Topology -`azslurm` in slurm 4.0 project upgrades `azslurm generate_topology` to `azslurm topology` to generate the [topology plugin configuration](https://slurm.schedmd.com/topology.html) for slurm either using VMSS topology, a fabric manager that has SHARP enabled, or the NVLink Domain. `azslurm topology` can generate both tree and block topology plugin configurations for Slurm. Users may use `azslurm topology` to generate the topology file but must manually add it to `/etc/slurm/topology.conf` either by giving that as the output file or copying the file over. Additionally, users must specify `topologyType=tree|block` in `slurm.conf` for full functionality. +`azslurm` in slurm 4.0 project upgrades `azslurm generate_topology` to `azslurm topology` to generate the [topology plugin configuration](https://slurm.schedmd.com/topology.html) for slurm either using VMSS topology, a fabric manager that has SHARP enabled, or the NVLink Domain. `azslurm topology` can generate both tree and block topology plugin configurations for Slurm. Users may use `azslurm topology` to generate the topology file but must manually add it to `/etc/slurm/topology.conf` either by giving that as the output file or copying the file over. Additionally, users must specify `topologyType=tree|block` in `slurm.conf` for full functionality. `azslurm` also includes `azslurm show_topology` to visualize block topology configurations as either a table or json format. Note: `azslurm topology` is only useful in manually scaled clusters or clusters of fixed size. Autoscaling does not take topology into account and topology is not updated on autoscale. @@ -321,6 +321,112 @@ BlockSizes=5 ``` This either prints out the topology in slurm topology format or creates an output file with the topology. +`azslurm show_topology` can visualize block topology configurations as either a json or table format. +``` +usage: azslurm show_topology [-h] [--config CONFIG] [--output OUTPUT] [--format {table,json}] + +options: + -h, --help show this help message and exit + --config CONFIG, -c CONFIG + --output OUTPUT Output file for the topology (default: /etc/slurm/topology.conf) + --format {table,json} + ``` + To view block topology configuration in table format: + ``` +azslurm show_topology --format table +Block Name | Size | Nodes +-----------+----------+--------------------------------------------------- +block3 | 8 | ccw-1-3-gpu-31, ccw-1-3-gpu-52, ccw-1-3-gpu-297... +block4 | 9 | ccw-1-3-gpu-5, ccw-1-3-gpu-17, ccw-1-3-gpu-254,... +block2 | 16 | ccw-1-3-gpu-464, ccw-1-3-gpu-7, ccw-1-3-gpu-454... +block1 | 18 | ccw-1-3-gpu-21, ccw-1-3-gpu-407, ccw-1-3-gpu-33... + +Total blocks: 4 +Total nodes: 51 + ``` + To view block topology configuration in json format: + ``` + azslurm show_topology --format json +[ + { + "blockname": "block3", + "size": 8, + "nodelist": [ + "ccw-1-3-gpu-31", + "ccw-1-3-gpu-52", + "ccw-1-3-gpu-297", + "ccw-1-3-gpu-319", + "ccw-1-3-gpu-349", + "ccw-1-3-gpu-62", + "ccw-1-3-gpu-394", + "ccw-1-3-gpu-122" + ] + }, + { + "blockname": "block4", + "size": 9, + "nodelist": [ + "ccw-1-3-gpu-5", + "ccw-1-3-gpu-17", + "ccw-1-3-gpu-254", + "ccw-1-3-gpu-284", + "ccw-1-3-gpu-249", + "ccw-1-3-gpu-37", + "ccw-1-3-gpu-229", + "ccw-1-3-gpu-109", + "ccw-1-3-gpu-294" + ] + }, + { + "blockname": "block2", + "size": 16, + "nodelist": [ + "ccw-1-3-gpu-464", + "ccw-1-3-gpu-7", + "ccw-1-3-gpu-454", + "ccw-1-3-gpu-344", + "ccw-1-3-gpu-91", + "ccw-1-3-gpu-217", + "ccw-1-3-gpu-324", + "ccw-1-3-gpu-43", + "ccw-1-3-gpu-188", + "ccw-1-3-gpu-97", + "ccw-1-3-gpu-434", + "ccw-1-3-gpu-172", + "ccw-1-3-gpu-153", + "ccw-1-3-gpu-277", + "ccw-1-3-gpu-147", + "ccw-1-3-gpu-354" + ] + }, + { + "blockname": "block1", + "size": 18, + "nodelist": [ + "ccw-1-3-gpu-21", + "ccw-1-3-gpu-407", + "ccw-1-3-gpu-333", + "ccw-1-3-gpu-60", + "ccw-1-3-gpu-387", + "ccw-1-3-gpu-145", + "ccw-1-3-gpu-190", + "ccw-1-3-gpu-205", + "ccw-1-3-gpu-115", + "ccw-1-3-gpu-236", + "ccw-1-3-gpu-164", + "ccw-1-3-gpu-180", + "ccw-1-3-gpu-195", + "ccw-1-3-gpu-438", + "ccw-1-3-gpu-305", + "ccw-1-3-gpu-255", + "ccw-1-3-gpu-14", + "ccw-1-3-gpu-400" + ] + } +] +``` + + ### GB200/GB300 IMEX Support Cyclecloud Slurm clusters now include prolog and epilog scripts to enable and cleanup IMEX service on a per-job basis. The prolog script will attempt to kill an existing IMEX service before configuring a new instance that will be specific to the new, submitted job. The epilog script terminates the IMEX service. By default, these scripts will run for GB200/GB300 nodes and not run for non-GB200/GB300 nodes. A configurable parameter `slurm.imex.enabled` has been added to the slurm cluster configuration template to allow non-GB200/GB300 nodes to enable IMEX support for their jobs or allow GB200/GB300 nodes to disable IMEX support for their jobs. @@ -474,6 +580,7 @@ This will change the behavior of the `azslurm return_to_idle` command that is, b retry_failed_nodes - Retries all nodes in a failed state. scale - shell - Interactive python shell with relevant objects in local scope. Use --script to run python scripts + show_topology - Show the topology configuration for block topology suspend - Equivalent to SuspendProgram, shutsdown nodes topology - Generates topology plugin configuration wait_for_resume - Wait for a set of nodes to converge. diff --git a/azure-slurm/slurmcc/cli.py b/azure-slurm/slurmcc/cli.py index cec3320f..22c1c1d4 100644 --- a/azure-slurm/slurmcc/cli.py +++ b/azure-slurm/slurmcc/cli.py @@ -9,6 +9,7 @@ import sys import time import traceback +from pathlib import Path from argparse import ArgumentParser from datetime import date, datetime, time, timedelta from math import ceil @@ -234,6 +235,30 @@ def topology(self, config: Dict, partition, output, use_vmss, use_fabric_manager else: raise ValueError("Please specify either --use_vmss or --use_fabric_manager or --use_nvlink_domain") + def show_topology_parser(self, parser: ArgumentParser) -> None: + parser.add_argument( + "--output", + type=str, + default="/etc/slurm/topology.conf", + help="Output file for the topology (default: /etc/slurm/topology.conf)" + ) + parser.add_argument( + "--format", + type=str, + choices=["table", "json"], + default="table", + help="Output format: table or json (default: table)" + ) + + def show_topology(self, config: Dict, output: Optional[str] = None, format: Optional[str] = None) -> None: + """ + Show the topology configuration for block topology + """ + if format == "json": + print(slutil.output_block_nodelist(output)) + else: + print(slutil.output_block_nodelist(output, table=True)) + def partitions_parser(self, parser: ArgumentParser) -> None: parser.add_argument("--allow-empty", action="store_true", default=False) diff --git a/azure-slurm/slurmcc/topology.py b/azure-slurm/slurmcc/topology.py index 9cc1f448..40126d8e 100644 --- a/azure-slurm/slurmcc/topology.py +++ b/azure-slurm/slurmcc/topology.py @@ -620,88 +620,4 @@ def run(self): log.info("Finished writing slurm topology to %s", self.slurm_top_file) else: print(content, end='') - log.info("Printed slurm topology") - -def output_block_nodelist(topology_input, table=False): - """ - Parse a topology file or string and output node information. - - Args: - topology_input: Either a file path (str) or topology content (str) - table: If True, output in tabular format; if False, output as JSON - - Returns: - If table=False: JSON string of blocks sorted by size - If table=True: string in tabular format with block names, sizes, and node lists - """ - - # Read content from file or use string directly - if Path(topology_input).exists(): - with open(topology_input, 'r', encoding='utf-8') as f: - content = f.read() - else: - content = topology_input - - # Check if it's a block topology - if 'BlockName=' not in content: - raise ValueError("Input is not a block topology format") - - # Parse blocks - blocks = [] - lines = content.strip().split('\n') - - for line in lines: - # Skip comments and empty lines - line = line.strip() - if not line or line.startswith('#'): - continue - - # Parse BlockName lines - if line.startswith('BlockName='): - # Extract block name and nodes - match = re.match(r'BlockName=([\w-]+)\s+Nodes=(.+)', line) - if match: - block_name = match.group(1) - nodes_str = match.group(2) - nodelist = [node.strip() for node in nodes_str.split(',')] - - blocks.append({ - 'blockname': block_name, - 'size': len(nodelist), - 'nodelist': nodelist - }) - - # Sort blocks by size (smallest to largest) - blocks.sort(key=lambda x: x['size']) - - if not table: - # Return JSON format - return json.dumps(blocks, indent=2) - else: - # Print tabular format - if not blocks: - print("No blocks found in topology") - return - - # Calculate column widths - max_blockname = max(len(b['blockname']) for b in blocks) - max_blockname = max(max_blockname, len("Block Name")) - lines = [] - # Header - lines.append(f"{'Block Name':<{max_blockname}} | {'Size':>6} | {'Nodes'}") - lines.append(f"{'-' * max_blockname}-+-{'-' * 8}-+-{'-' * 50}") - - # Data rows - for block in blocks: - nodes_str = ', '.join(block['nodelist']) - # Truncate long node lists for display - if len(nodes_str) > 50: - nodes_str = nodes_str[:47] + '...' - lines.append(f"{block['blockname']:<{max_blockname}} | {block['size']:>6} | {nodes_str}") - - # Summary - lines.append(f"\nTotal blocks: {len(blocks)}") - lines.append(f"Total nodes: {sum(b['size'] for b in blocks)}") - - return "\n".join(lines) + "\n" - \ No newline at end of file + log.info("Printed slurm topology") \ No newline at end of file diff --git a/azure-slurm/slurmcc/util.py b/azure-slurm/slurmcc/util.py index 9e13f581..eb95d1c2 100644 --- a/azure-slurm/slurmcc/util.py +++ b/azure-slurm/slurmcc/util.py @@ -13,9 +13,12 @@ from pssh.exceptions import Timeout import pwd import grp +import json from . import AzureSlurmError, custom_chaos_mode +import re from typing import Literal +from pathlib import Path class SrunExitCodeException(Exception): @@ -303,6 +306,88 @@ def user_has_login_shell(username): raise Timeout(f"Command timed out: {cmd}. Error: {str(te)}") except Exception as e: raise Exception(f"Error running command: {cmd}: {str(e)}") + +def output_block_nodelist(topology_input, table=False): + """ + Parse a topology file or string and output node information. + + Args: + topology_input: Either a file path (str) or topology content (str) + table: If True, output in tabular format; if False, output as JSON + + Returns: + If table=False: JSON string of blocks sorted by size + If table=True: string in tabular format with block names, sizes, and node lists + """ + + # Read content from file or use string directly + if Path(topology_input).exists(): + with open(topology_input, 'r', encoding='utf-8') as f: + content = f.read() + else: + content = topology_input + + # Check if it's a block topology + if 'BlockName=' not in content: + raise ValueError("Input is not a block topology format") + + # Parse blocks + blocks = [] + lines = content.strip().split('\n') + + for line in lines: + # Skip comments and empty lines + line = line.strip() + if not line or line.startswith('#'): + continue + + # Parse BlockName lines + if line.startswith('BlockName='): + # Extract block name and nodes + match = re.match(r'BlockName=([\w-]+)\s+Nodes=(.+)', line) + if match: + block_name = match.group(1) + nodes_str = match.group(2) + nodelist = [node.strip() for node in nodes_str.split(',')] + + blocks.append({ + 'blockname': block_name, + 'size': len(nodelist), + 'nodelist': nodelist + }) + + if not blocks: + raise ValueError("No blocks found in topology") + # Sort blocks by size (smallest to largest) + blocks.sort(key=lambda x: x['size']) + + if not table: + # Return JSON format + return json.dumps(blocks, indent=2) + else: + # Print tabular format + # Calculate column widths + max_blockname = max(len(b['blockname']) for b in blocks) + max_blockname = max(max_blockname, len("Block Name")) + lines = [] + # Header + lines.append(f"{'Block Name':<{max_blockname}} | {'Size':>6} | {'Nodes'}") + lines.append(f"{'-' * max_blockname}-+-{'-' * 8}-+-{'-' * 50}") + + # Data rows + for block in blocks: + nodes_str = ', '.join(block['nodelist']) + # Truncate long node lists for display + if len(nodes_str) > 50: + nodes_str = nodes_str[:47] + '...' + lines.append(f"{block['blockname']:<{max_blockname}} | {block['size']:>6} | {nodes_str}") + + # Summary + lines.append(f"\nTotal blocks: {len(blocks)}") + lines.append(f"Total nodes: {sum(b['size'] for b in blocks)}") + + return "\n".join(lines) + "\n" + def retry_rest(func: Callable, attempts: int = 5) -> Any: attempts = max(1, attempts)