diff --git a/azure-slurm-install/start-services.sh b/azure-slurm-install/start-services.sh index fc432842..826767e8 100644 --- a/azure-slurm-install/start-services.sh +++ b/azure-slurm-install/start-services.sh @@ -1,14 +1,135 @@ #!/usr/bin/env bash set -e -if [ "$1" == "" ]; then - echo "Usage: $0 [scheduler|execute|login]" - exit 1 -fi +run_slurmdbd_via_systemctl() { -role=$1 -monitoring_enabled=$(/opt/cycle/jetpack/bin/jetpack config cyclecloud.monitoring.enabled False) -OS=$(. /etc/os-release; echo $ID) + echo "Starting slurmdbd via systemctl..." + systemctl start slurmdbd + + # Verify slurmdbd is responding + sleep 10 + if ! sacctmgr ping > /dev/null 2>&1; then + echo "ERROR: slurmdbd started but is not responding to sacctmgr ping" + exit 2 + fi + echo "slurmdbd is running and responding to ping" +} + +run_slurmdbd() { + + if [[ $(jetpack config slurm.is_primary_scheduler True) == "False" ]]; then + run_slurmdbd_via_systemctl + return + fi + # Get the slurm version from scontrol + slurm_version=$(scontrol --version | awk '{print $2}') + if [ -z "$slurm_version" ]; then + echo "Failed to get slurm version from scontrol --version" + return 1 + fi + echo "Slurm version: $slurm_version" + + # Define the expected startup message + startup_message="slurmdbd version ${slurm_version} started" + echo "Waiting for startup message: $startup_message" + + # Create a temp file for slurmdbd output + log_file=$(mktemp) + + # Start slurmdbd in foreground as user slurm, redirect output to log file + # use setsid to start slurmdbd in a new session. + setsid sudo -u slurm /usr/sbin/slurmdbd -D > "$log_file" 2>&1 & + slurmdbd_pid=$! + + # Monitor the log file for the startup message. + # slurmdbd rollup can take a long time. We are considering a timeout of 1 hr. + timeout=3600 + elapsed=0 + started=false + + while [ $elapsed -lt $timeout ]; do + if grep -q "$startup_message" "$log_file" 2>/dev/null; then + echo "Detected slurmdbd startup message" + started=true + break + fi + sleep 1 + elapsed=$((elapsed + 1)) + done + + + # Only kill the foreground process if started successfully + if [ "$started" == "true" ]; then + # Kill the foreground slurmdbd process + if [ -n "$slurmdbd_pid" ] && kill -0 $slurmdbd_pid 2>/dev/null; then + echo "Stopping foreground slurmdbd process (PID: $slurmdbd_pid)" + kill -INT $slurmdbd_pid + # Wait up to 60 seconds for graceful shutdown + wait_timeout=60 + while [ $wait_timeout -gt 0 ] && kill -0 $slurmdbd_pid 2>/dev/null; do + sleep 1 + wait_timeout=$((wait_timeout - 1)) + done + + # Force kill if still running + if kill -0 $slurmdbd_pid 2>/dev/null; then + echo "Process did not exit gracefully, sending SIGKILL" + kill -9 $slurmdbd_pid 2>/dev/null + sleep 1 + fi + echo "Foreground slurmdbd process stopped" + fi + + run_slurmdbd_via_systemctl + else + echo "slurmdbd startup is taking long, manual intervention is required" + fi + + # clean up the log file + rm -f "$log_file" +} + +run_slurmctld() { + echo "Starting Slurmctld" + systemctl start slurmctld + attempts=3 + delay=5 + set +e + for i in $( seq 1 $attempts ); do + echo $i/$attempts sleeping $delay seconds before running scontrol ping + sleep $delay + scontrol ping > /dev/null 2>&1; + if [ $? == 0 ]; then + systemctl start slurmctld || exit 1 + break + fi; + done + if [ $i == $attempts ] && [ $? != 0 ]; then + echo FATAL: slurmctld failed to start! 1>&2 + echo Here are the last 100 lines of slurmctld.log + tail -n 100 /var/log/slurmctld/slurmctld.log 1>&2 + exit 2 + fi +} + +run_slurmrestd() { + if [[ "$OS" == "sle_hpc" ]]; then + echo Warning: slurmrestd is not supported on SUSE, skipping start. 1>&2 + exit 0 + fi + monitoring_enabled=$(/opt/cycle/jetpack/bin/jetpack config cyclecloud.monitoring.enabled False) + systemctl start slurmrestd + systemctl status slurmrestd --no-pager > /dev/null + if [ $? != 0 ]; then + echo Warning: slurmrestd failed to start! 1>&2 + /opt/cycle/jetpack/bin/jetpack log "slurmrestd failed to start" --level=warn --priority=medium + exit 0 + fi + # start slurm_exporter if monitoring is enabled and slurmrestd is running + if [[ "$monitoring_enabled" == "True" ]]; then + run_slurm_exporter + fi +} reload_prom_config(){ # Find the Prometheus process and send SIGHUP to reload config or log a warning if not found @@ -24,64 +145,6 @@ reload_prom_config(){ echo "Prometheus process not found, unable to reload configuration" fi } -# all nodes need to have munge running -echo restarting munge... -systemctl restart munge -# wait up to 60 seconds for munge to start -iters=60 -while [ $iters -ge 0 ]; do - echo test | munge > /dev/null 2>&1 - if [ $? == 0 ]; then - break - fi - sleep 1 - iters=$(( $iters - 1 )) -done - -# login nodes explicitly should _not_ have slurmd running. -if [ $role == "login" ]; then - reload_prom_config - exit 0 -fi - -# execute nodes just need slurmd -if [ $role == "execute" ]; then - systemctl start slurmd - reload_prom_config - exit 0 -fi - -# sanity check - make sure a valid role was actually passed in. -# note they are defined in the slurm_*_role.rb -if [ $role != "scheduler" ]; then - echo unknown role! $role 1>&2 - exit 2 -fi - -# lastly - the scheduler - -systemctl show slurmdbd 2>&1 > /dev/null && systemctl start slurmdbd -# there is no obvious way to check slurmdbd status _before_ starting slurmctld -sleep 10 -systemctl start slurmctld -attempts=3 -delay=5 -set +e -for i in $( seq 1 $attempts ); do - echo $i/$attempts sleeping $delay seconds before running scontrol ping - sleep $delay - scontrol ping - if [ $? == 0 ]; then - systemctl start slurmctld || exit 1 - break - fi; -done -if [ $i == $attempts ] && [ $? != 0 ]; then - echo FATAL: slurmctld failed to start! 1>&2 - echo Here are the last 100 lines of slurmctld.log - tail -n 100 /var/log/slurmctld/slurmctld.log 1>&2 - exit 2 -fi run_slurm_exporter() { # Run Slurm Exporter in a container @@ -145,22 +208,55 @@ run_slurm_exporter() { fi } -if [[ "$OS" == "sle_hpc" ]]; then - echo Warning: slurmrestd is not supported on SUSE, skipping start. 1>&2 - exit 0 -fi - -# start slurmrestd -sleep 10 -systemctl start slurmrestd -systemctl status slurmrestd --no-pager > /dev/null -if [ $? != 0 ]; then - echo Warning: slurmrestd failed to start! 1>&2 - /opt/cycle/jetpack/bin/jetpack log "slurmrestd failed to start" --level=warn --priority=medium - exit 0 -fi -# start slurm_exporter if monitoring is enabled and slurmrestd is running -if [[ "$monitoring_enabled" == "True" ]]; then - run_slurm_exporter -fi -exit 0 + +{ + if [ "$1" == "" ]; then + echo "Usage: $0 [scheduler|execute|login]" + exit 1 + fi + + role=$1 + + OS=$(. /etc/os-release; echo $ID) + echo "Starting services" + # all nodes need to have munge running + echo restarting munge... + systemctl restart munge + # wait up to 60 seconds for munge to start + iters=60 + while [ $iters -ge 0 ]; do + echo test | munge > /dev/null 2>&1 + if [ $? == 0 ]; then + break + fi + sleep 1 + iters=$(( $iters - 1 )) + done + + # login nodes explicitly should _not_ have slurmd running. + if [ $role == "login" ]; then + reload_prom_config + exit 0 + fi + + # execute nodes just need slurmd + if [ $role == "execute" ]; then + systemctl start slurmd + reload_prom_config + exit 0 + fi + + # sanity check - make sure a valid role was actually passed in. + # note they are defined in the slurm_*_role.rb + if [ $role != "scheduler" ]; then + echo unknown role! $role 1>&2 + exit 2 + fi + + # lastly - the scheduler + run_slurmdbd + + run_slurmctld + + run_slurmrestd +} 2>&1 | tee -a /var/log/azure-slurm-install.log