diff --git a/image/docker-entrypoint.sh b/image/docker-entrypoint.sh index 5d65241..2f7c716 100755 --- a/image/docker-entrypoint.sh +++ b/image/docker-entrypoint.sh @@ -77,6 +77,10 @@ then done echo "-- slurmctld is now active ..." + echo "---> Updating node definitions ..." + scontrol delete node=${POD_NAME} + scontrol create NodeName=${POD_NAME} $(slurmd -C | head -n1 | cut -d ' ' -f 2-) State=FUTURE + echo "---> Starting the Slurm Node Daemon (slurmd) ..." exec /usr/sbin/slurmd -D "${@:2}" diff --git a/slurm-cluster-chart/files/slurm.conf b/slurm-cluster-chart/files/slurm.conf index a10c12b..a84dbe8 100644 --- a/slurm-cluster-chart/files/slurm.conf +++ b/slurm-cluster-chart/files/slurm.conf @@ -52,7 +52,8 @@ CommunicationParameters=NoAddrCache # NODES MaxNodeCount=10 -NodeName=slurmd-[0-9] State=FUTURE CPUs=4 +NodeName=slurmd-[0-9] State=FUTURE +TreeWidth=65533 # PARTITIONS PartitionName=all Default=yes Nodes=ALL diff --git a/slurm-cluster-chart/templates/slurmd.yaml b/slurm-cluster-chart/templates/slurmd.yaml index bec55ce..66638f2 100644 --- a/slurm-cluster-chart/templates/slurmd.yaml +++ b/slurm-cluster-chart/templates/slurmd.yaml @@ -36,7 +36,7 @@ spec: name: slurmd ports: - containerPort: 6818 - hostPort: 6818 + hostPort: 6818 # used to ensure only a single pod per k8s node resources: {} volumeMounts: - mountPath: /etc/slurm/ diff --git a/slurm-cluster-chart/values.yaml b/slurm-cluster-chart/values.yaml index 3d41248..2acf0dd 100644 --- a/slurm-cluster-chart/values.yaml +++ b/slurm-cluster-chart/values.yaml @@ -1,4 +1,4 @@ -slurmImage: ghcr.io/stackhpc/slurm-docker-cluster:d3daba4 +slurmImage: ghcr.io/stackhpc/slurm-docker-cluster:23aecda login: # Deployment resource name @@ -7,8 +7,8 @@ login: slurmd: # StatefulSet resource name - name: slurmd # NB this must match NodeName= in slurm-cluster-chart/files/slurm.conf - replicas: 2 + name: slurmd # NB must match prefix of NodeName= definition in slurm-cluster-chart/files/slurm.conf + replicas: 2 # NB must be <= number of nodes in NodeName= definition in slurm-cluster-chart/files/slurm.conf slurmctld: # StatefulSet resource name