From 3fe23f34b983823026b86deb7e2f60830788cc5c Mon Sep 17 00:00:00 2001 From: Robert Oleynik Date: Tue, 29 Apr 2025 16:15:52 +0200 Subject: [PATCH 1/9] install slurm to debian --- slurm-docker-cluster/Dockerfile.debian | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 slurm-docker-cluster/Dockerfile.debian diff --git a/slurm-docker-cluster/Dockerfile.debian b/slurm-docker-cluster/Dockerfile.debian new file mode 100644 index 0000000..2141361 --- /dev/null +++ b/slurm-docker-cluster/Dockerfile.debian @@ -0,0 +1,20 @@ +FROM debian:bookworm + +ENV LC_ALL=en_US.utf8 +ENV LANG=en_US.utf8 + +ARG SLURM_VERSION=24.05.4 +ARG GOSU_VERSION=1.17 +ARG UV_VERSION=0.6.4 + +COPY --from=ghcr.io/astral-sh/uv:$UV_VERSION /uv /bin/uv + +RUN set -xe \ + && apt-get update \ + && apt-get install -y curl gnupg + +RUN bash -c "set -xe && for package in 'smd' 'smd-client' 'smd-slurmd' 'smd-slurmctld' 'smd-slurmdbd' 'smd-sview'; do \ + curl -Lo \"/tmp/slurm-\${package}.deb\" https://github.com/scalableminds/slurm-packages/releases/download/${SLURM_VERSION}/slurm-\${package}_${SLURM_VERSION}-1_amd64.deb; \ +done" +RUN bash -c "apt-get install --yes -f /tmp/*.deb" +RUN bash -c "rm /tmp/*" From 1c5a67eff510ad62cd50d70cf64fb51dce06f578 Mon Sep 17 00:00:00 2001 From: Robert Oleynik Date: Wed, 30 Apr 2025 10:24:31 +0200 Subject: [PATCH 2/9] port Dockerfile to debian --- slurm-docker-cluster/Dockerfile | 98 +++++------------------ slurm-docker-cluster/Dockerfile.debian | 20 ----- slurm-docker-cluster/docker-entrypoint.sh | 12 +-- 3 files changed, 25 insertions(+), 105 deletions(-) delete mode 100644 slurm-docker-cluster/Dockerfile.debian diff --git a/slurm-docker-cluster/Dockerfile b/slurm-docker-cluster/Dockerfile index f5dc3fd..dfb553a 100644 --- a/slurm-docker-cluster/Dockerfile +++ b/slurm-docker-cluster/Dockerfile @@ -1,82 +1,26 @@ -FROM rockylinux:9.2 +FROM debian:bookworm -ENV LC_ALL=en_US.utf8 -ENV LANG=en_US.utf8 +ARG SLURM_VERSION="24.05.4" +ARG UV_VERSION="0.6.4" +ARG SLURM_USER_UID=990 +ARG SLURM_USER_GID=990 -LABEL org.opencontainers.image.source="https://github.com/giovtorres/slurm-docker-cluster" \ - org.opencontainers.image.title="slurm-docker-cluster" \ - org.opencontainers.image.description="Slurm Docker cluster on CentOS 7" \ - org.label-schema.docker.cmd="docker-compose up -d" \ - maintainer="Giovanni Torres" +COPY --from=ghcr.io/astral-sh/uv:$UV_VERSION /uv /bin/uv -ARG SLURM_TAG=slurm-22-05-9-1 -ARG GOSU_VERSION=1.11 +RUN set -xe \ + && apt-get update \ + && apt-get install -y curl gnupg -COPY --from=ghcr.io/astral-sh/uv:0.4.20 /uv /bin/uv +RUN bash -c "set -xe && for package in 'smd' 'smd-client' 'smd-slurmd' 'smd-slurmctld' 'smd-slurmdbd' 'smd-sview'; do \ + curl -Lo \"/tmp/slurm-\${package}.deb\" https://github.com/scalableminds/slurm-packages/releases/download/${SLURM_VERSION}/slurm-\${package}_${SLURM_VERSION}-1_amd64.deb; \ +done" +RUN bash -c "apt-get install --yes -f /tmp/*.deb" +RUN bash -c "rm /tmp/*" -RUN set -ex \ - && dnf makecache \ - && dnf -y update \ - && dnf -y install dnf-plugins-core https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm \ - && dnf config-manager --enable epel \ - && dnf config-manager --enable crb \ - && dnf -y install \ - wget \ - bzip2 \ - perl \ - gcc \ - gcc-c++\ - git \ - glibc-langpack-en \ - gnupg \ - make \ - munge \ - munge-devel \ - python39 \ - python3-devel \ - python3-pip \ - mariadb-server \ - mariadb-devel \ - psmisc \ - slurm-contribs \ - bash-completion \ - vim-enhanced \ - && dnf clean all \ - && rm -rf /var/cache/yum \ - && ln -s /usr/bin/python3 /usr/bin/python - - - -RUN set -ex \ - && wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" \ - && wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc" \ - && export GNUPGHOME="$(mktemp -d)" \ - # See https://github.com/tianon/gosu/issues/17#issuecomment-348464529 - && ( gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \ - || gpg --keyserver pgp.mit.edu --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \ - || gpg --keyserver keyserver.pgp.com --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \ - || gpg --keyserver keyserver.ubuntu.com --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \ - ) \ - && gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \ - && rm -rf "${GNUPGHOME}" /usr/local/bin/gosu.asc \ - && chmod +x /usr/local/bin/gosu \ - && gosu nobody true - -RUN set -x \ - && git clone --depth 1 --branch $SLURM_TAG https://github.com/SchedMD/slurm.git \ - && pushd slurm \ - && ./configure --enable-debug --prefix=/usr --sysconfdir=/etc/slurm \ - --with-mysql_config=/usr/bin --libdir=/usr/lib64 \ - && make install \ - && install -D -m644 etc/cgroup.conf.example /etc/slurm/cgroup.conf.example \ - && install -D -m644 etc/slurm.conf.example /etc/slurm/slurm.conf.example \ - && install -D -m644 etc/slurmdbd.conf.example /etc/slurm/slurmdbd.conf.example \ - && install -D -m644 contribs/slurm_completion_help/slurm_completion.sh /etc/profile.d/slurm_completion.sh \ - && popd \ - && rm -rf slurm \ - && groupadd -r --gid=1001 slurm \ - && useradd -r -g slurm --uid=1001 slurm \ - && mkdir /etc/sysconfig/slurm \ +RUN set -xe \ + && addgroup --gid="$SLURM_USER_GID" slurm \ + && adduser --system --uid="$SLURM_USER_UID" --ingroup slurm slurm \ + && mkdir -p /etc/sysconfig/slurm \ /var/spool/slurmd \ /var/run/slurmd \ /var/run/slurmdbd \ @@ -92,17 +36,13 @@ RUN set -x \ /var/lib/slurmd/assoc_usage \ /var/lib/slurmd/qos_usage \ /var/lib/slurmd/fed_mgr_state \ - && chown -R slurm:slurm /var/*/slurm* \ - && /sbin/create-munge-key + && chown -R slurm:slurm /var/*/slurm* COPY --chown=slurm:slurm slurm.conf /etc/slurm/slurm.conf COPY --chown=slurm:slurm slurmdbd.conf /etc/slurm/slurmdbd.conf - RUN chmod 600 /etc/slurm/slurm.conf RUN chmod 600 /etc/slurm/slurmdbd.conf COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh - ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"] - CMD ["slurmdbd"] diff --git a/slurm-docker-cluster/Dockerfile.debian b/slurm-docker-cluster/Dockerfile.debian deleted file mode 100644 index 2141361..0000000 --- a/slurm-docker-cluster/Dockerfile.debian +++ /dev/null @@ -1,20 +0,0 @@ -FROM debian:bookworm - -ENV LC_ALL=en_US.utf8 -ENV LANG=en_US.utf8 - -ARG SLURM_VERSION=24.05.4 -ARG GOSU_VERSION=1.17 -ARG UV_VERSION=0.6.4 - -COPY --from=ghcr.io/astral-sh/uv:$UV_VERSION /uv /bin/uv - -RUN set -xe \ - && apt-get update \ - && apt-get install -y curl gnupg - -RUN bash -c "set -xe && for package in 'smd' 'smd-client' 'smd-slurmd' 'smd-slurmctld' 'smd-slurmdbd' 'smd-sview'; do \ - curl -Lo \"/tmp/slurm-\${package}.deb\" https://github.com/scalableminds/slurm-packages/releases/download/${SLURM_VERSION}/slurm-\${package}_${SLURM_VERSION}-1_amd64.deb; \ -done" -RUN bash -c "apt-get install --yes -f /tmp/*.deb" -RUN bash -c "rm /tmp/*" diff --git a/slurm-docker-cluster/docker-entrypoint.sh b/slurm-docker-cluster/docker-entrypoint.sh index 9a1203a..3366020 100755 --- a/slurm-docker-cluster/docker-entrypoint.sh +++ b/slurm-docker-cluster/docker-entrypoint.sh @@ -4,13 +4,13 @@ set -e if [ "$1" = "slurmdbd" ] then echo "---> Starting the MUNGE Authentication service (munged) ..." - gosu munge /usr/sbin/munged + setpriv --reuid=munge --regid=munge --init-groups /usr/sbin/munged echo "---> Starting the Slurm Database Daemon (slurmdbd) ..." { . /etc/slurm/slurmdbd.conf - until echo "SELECT 1" | mysql -h $StorageHost -u$StorageUser -p$StoragePass 2>&1 > /dev/null + until echo "SELECT 1" | mysql -h "$StorageHost" -u"$StorageUser" -p"$StoragePass" 2>&1 > /dev/null do echo "-- Waiting for database to become active ..." sleep 2 @@ -18,13 +18,13 @@ then } echo "-- Database is now active ..." - exec gosu slurm /usr/sbin/slurmdbd -Dvvv + exec setpriv --reuid=slurm --regid=munge --init-groups /usr/sbin/slurmdbd -Dvvv fi if [ "$1" = "slurmctld" ] then echo "---> Starting the MUNGE Authentication service (munged) ..." - gosu munge /usr/sbin/munged + setpriv --reuid=munge --regid=munge --init-groups /usr/sbin/munged echo "---> Waiting for slurmdbd to become active before starting slurmctld ..." @@ -36,13 +36,13 @@ then echo "-- slurmdbd is now active ..." echo "---> Starting the Slurm Controller Daemon (slurmctld) ..." - exec gosu slurm /usr/sbin/slurmctld -Dvvv + exec setpriv --reuid=slurm --regid=slurm /usr/sbin/slurmctld -Dvvv fi if [ "$1" = "slurmd" ] then echo "---> Starting the MUNGE Authentication service (munged) ..." - gosu munge /usr/sbin/munged + setpriv --reuid=munge --regid=munge /usr/sbin/munged echo "---> Waiting for slurmctld to become active before starting slurmd..." From e596155088c82397e6f60fb75ff5d66039ee2e36 Mon Sep 17 00:00:00 2001 From: Robert Oleynik Date: Wed, 30 Apr 2025 11:32:09 +0200 Subject: [PATCH 3/9] add mysql --- slurm-docker-cluster/Dockerfile | 17 +++++++++++++---- slurm-docker-cluster/docker-entrypoint.sh | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/slurm-docker-cluster/Dockerfile b/slurm-docker-cluster/Dockerfile index dfb553a..837eda0 100644 --- a/slurm-docker-cluster/Dockerfile +++ b/slurm-docker-cluster/Dockerfile @@ -4,18 +4,25 @@ ARG SLURM_VERSION="24.05.4" ARG UV_VERSION="0.6.4" ARG SLURM_USER_UID=990 ARG SLURM_USER_GID=990 +ARG MYSQL_CONFIG_VERSION="0.8.34" COPY --from=ghcr.io/astral-sh/uv:$UV_VERSION /uv /bin/uv RUN set -xe \ && apt-get update \ - && apt-get install -y curl gnupg + && apt-get install -y curl gnupg lsb-release wget +RUN curl -Lo /tmp/mysql-apt-config.deb "https://dev.mysql.com/get/mysql-apt-config_$MYSQL_CONFIG_VERSION-1_all.deb" +RUN set -xe \ + && dpkg -i /tmp/mysql-apt-config.deb \ + && env DEBIAN_FRONTEND="noninteractive" dpkg-reconfigure mysql-apt-config \ + && apt-get update \ + && apt-get install --yes mysql-server RUN bash -c "set -xe && for package in 'smd' 'smd-client' 'smd-slurmd' 'smd-slurmctld' 'smd-slurmdbd' 'smd-sview'; do \ curl -Lo \"/tmp/slurm-\${package}.deb\" https://github.com/scalableminds/slurm-packages/releases/download/${SLURM_VERSION}/slurm-\${package}_${SLURM_VERSION}-1_amd64.deb; \ done" -RUN bash -c "apt-get install --yes -f /tmp/*.deb" -RUN bash -c "rm /tmp/*" +RUN apt-get install --yes -f /tmp/*.deb +RUN rm /tmp/* RUN set -xe \ && addgroup --gid="$SLURM_USER_GID" slurm \ @@ -27,6 +34,7 @@ RUN set -xe \ /var/lib/slurmd \ /var/log/slurm \ /data \ + /run/munge \ && touch /var/lib/slurmd/node_state \ /var/lib/slurmd/front_end_state \ /var/lib/slurmd/job_state \ @@ -36,7 +44,8 @@ RUN set -xe \ /var/lib/slurmd/assoc_usage \ /var/lib/slurmd/qos_usage \ /var/lib/slurmd/fed_mgr_state \ - && chown -R slurm:slurm /var/*/slurm* + && chown -R slurm:slurm /var/*/slurm* \ + && chown -R munge:munge /run/munge COPY --chown=slurm:slurm slurm.conf /etc/slurm/slurm.conf COPY --chown=slurm:slurm slurmdbd.conf /etc/slurm/slurmdbd.conf diff --git a/slurm-docker-cluster/docker-entrypoint.sh b/slurm-docker-cluster/docker-entrypoint.sh index 3366020..20aba45 100755 --- a/slurm-docker-cluster/docker-entrypoint.sh +++ b/slurm-docker-cluster/docker-entrypoint.sh @@ -42,7 +42,7 @@ fi if [ "$1" = "slurmd" ] then echo "---> Starting the MUNGE Authentication service (munged) ..." - setpriv --reuid=munge --regid=munge /usr/sbin/munged + setpriv --reuid=munge --regid=munge --init-groups /usr/sbin/munged echo "---> Waiting for slurmctld to become active before starting slurmd..." From 22d062490a53be9c8cf835243185be102eeaea70 Mon Sep 17 00:00:00 2001 From: Robert Oleynik Date: Wed, 30 Apr 2025 11:39:34 +0200 Subject: [PATCH 4/9] pull uv image first --- slurm-docker-cluster/Dockerfile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/slurm-docker-cluster/Dockerfile b/slurm-docker-cluster/Dockerfile index 837eda0..95d0618 100644 --- a/slurm-docker-cluster/Dockerfile +++ b/slurm-docker-cluster/Dockerfile @@ -1,12 +1,15 @@ +ARG UV_VERSION="0.7.0" + +FROM ghcr.io/astral-sh/uv:$UV_VERSION AS uv + FROM debian:bookworm ARG SLURM_VERSION="24.05.4" -ARG UV_VERSION="0.6.4" ARG SLURM_USER_UID=990 ARG SLURM_USER_GID=990 ARG MYSQL_CONFIG_VERSION="0.8.34" -COPY --from=ghcr.io/astral-sh/uv:$UV_VERSION /uv /bin/uv +COPY --from=uv /uv /bin/uv RUN set -xe \ && apt-get update \ From c5ed69982da1fee437f6dc32561667c097e61833 Mon Sep 17 00:00:00 2001 From: Robert Oleynik Date: Wed, 30 Apr 2025 13:25:58 +0200 Subject: [PATCH 5/9] fix group id --- slurm-docker-cluster/docker-entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/slurm-docker-cluster/docker-entrypoint.sh b/slurm-docker-cluster/docker-entrypoint.sh index 20aba45..52c2c4d 100755 --- a/slurm-docker-cluster/docker-entrypoint.sh +++ b/slurm-docker-cluster/docker-entrypoint.sh @@ -18,7 +18,7 @@ then } echo "-- Database is now active ..." - exec setpriv --reuid=slurm --regid=munge --init-groups /usr/sbin/slurmdbd -Dvvv + exec setpriv --reuid=slurm --regid=slurm --init-groups /usr/sbin/slurmdbd -Dvvv fi if [ "$1" = "slurmctld" ] From 5cdfca51ba320be154e140a902e48862f8a5b7ee Mon Sep 17 00:00:00 2001 From: Robert Oleynik Date: Wed, 30 Apr 2025 13:33:27 +0200 Subject: [PATCH 6/9] add missing argument --- slurm-docker-cluster/docker-entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/slurm-docker-cluster/docker-entrypoint.sh b/slurm-docker-cluster/docker-entrypoint.sh index 52c2c4d..1b1cae7 100755 --- a/slurm-docker-cluster/docker-entrypoint.sh +++ b/slurm-docker-cluster/docker-entrypoint.sh @@ -36,7 +36,7 @@ then echo "-- slurmdbd is now active ..." echo "---> Starting the Slurm Controller Daemon (slurmctld) ..." - exec setpriv --reuid=slurm --regid=slurm /usr/sbin/slurmctld -Dvvv + exec setpriv --reuid=slurm --regid=slurm --init-groups /usr/sbin/slurmctld -Dvvv fi if [ "$1" = "slurmd" ] From 5749a33e5bc451b3dc5818c1b91266f165cbd3eb Mon Sep 17 00:00:00 2001 From: Robert Oleynik Date: Wed, 30 Apr 2025 15:11:20 +0200 Subject: [PATCH 7/9] restart slurmctld --- slurm-docker-cluster/docker-entrypoint.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/slurm-docker-cluster/docker-entrypoint.sh b/slurm-docker-cluster/docker-entrypoint.sh index 1b1cae7..f04e2cb 100755 --- a/slurm-docker-cluster/docker-entrypoint.sh +++ b/slurm-docker-cluster/docker-entrypoint.sh @@ -36,7 +36,10 @@ then echo "-- slurmdbd is now active ..." echo "---> Starting the Slurm Controller Daemon (slurmctld) ..." - exec setpriv --reuid=slurm --regid=slurm --init-groups /usr/sbin/slurmctld -Dvvv + while true; do + setpriv --reuid=slurm --regid=slurm --init-groups /usr/sbin/slurmctld -Dvvv + echo "slurmctl: exited with $?" + done fi if [ "$1" = "slurmd" ] From 6adad9072551f830cb90036b7a40f48060c3736d Mon Sep 17 00:00:00 2001 From: Robert Oleynik Date: Wed, 30 Apr 2025 15:22:04 +0200 Subject: [PATCH 8/9] do not create slurm job state --- slurm-docker-cluster/Dockerfile | 9 --------- 1 file changed, 9 deletions(-) diff --git a/slurm-docker-cluster/Dockerfile b/slurm-docker-cluster/Dockerfile index 95d0618..8d46943 100644 --- a/slurm-docker-cluster/Dockerfile +++ b/slurm-docker-cluster/Dockerfile @@ -38,15 +38,6 @@ RUN set -xe \ /var/log/slurm \ /data \ /run/munge \ - && touch /var/lib/slurmd/node_state \ - /var/lib/slurmd/front_end_state \ - /var/lib/slurmd/job_state \ - /var/lib/slurmd/resv_state \ - /var/lib/slurmd/trigger_state \ - /var/lib/slurmd/assoc_mgr_state \ - /var/lib/slurmd/assoc_usage \ - /var/lib/slurmd/qos_usage \ - /var/lib/slurmd/fed_mgr_state \ && chown -R slurm:slurm /var/*/slurm* \ && chown -R munge:munge /run/munge From cb54f5ef518b893687daff5d4fa225f351942bfb Mon Sep 17 00:00:00 2001 From: Robert Oleynik Date: Wed, 30 Apr 2025 15:57:46 +0200 Subject: [PATCH 9/9] also restart slurmd --- slurm-docker-cluster/docker-entrypoint.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/slurm-docker-cluster/docker-entrypoint.sh b/slurm-docker-cluster/docker-entrypoint.sh index f04e2cb..8622d0a 100755 --- a/slurm-docker-cluster/docker-entrypoint.sh +++ b/slurm-docker-cluster/docker-entrypoint.sh @@ -38,7 +38,7 @@ then echo "---> Starting the Slurm Controller Daemon (slurmctld) ..." while true; do setpriv --reuid=slurm --regid=slurm --init-groups /usr/sbin/slurmctld -Dvvv - echo "slurmctl: exited with $?" + echo "---> slurmctl: exited with $?" done fi @@ -57,7 +57,10 @@ then echo "-- slurmctld is now active ..." echo "---> Starting the Slurm Node Daemon (slurmd) ..." - exec /usr/sbin/slurmd -Dvvv + while true; do + /usr/sbin/slurmd -Dvvv + echo "---> slurmd: exited with $?" + done fi exec "$@"