Skip to content

Port Slurm Docker Cluster to Debian #67

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 35 additions & 92 deletions slurm-docker-cluster/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,108 +1,51 @@
FROM rockylinux:9.2

ENV LC_ALL=en_US.utf8
ENV LANG=en_US.utf8

LABEL org.opencontainers.image.source="https://github.com/giovtorres/slurm-docker-cluster" \
org.opencontainers.image.title="slurm-docker-cluster" \
org.opencontainers.image.description="Slurm Docker cluster on CentOS 7" \
org.label-schema.docker.cmd="docker-compose up -d" \
maintainer="Giovanni Torres"

ARG SLURM_TAG=slurm-22-05-9-1
ARG GOSU_VERSION=1.11

COPY --from=ghcr.io/astral-sh/uv:0.4.20 /uv /bin/uv

RUN set -ex \
&& dnf makecache \
&& dnf -y update \
&& dnf -y install dnf-plugins-core https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm \
&& dnf config-manager --enable epel \
&& dnf config-manager --enable crb \
&& dnf -y install \
wget \
bzip2 \
perl \
gcc \
gcc-c++\
git \
glibc-langpack-en \
gnupg \
make \
munge \
munge-devel \
python39 \
python3-devel \
python3-pip \
mariadb-server \
mariadb-devel \
psmisc \
slurm-contribs \
bash-completion \
vim-enhanced \
&& dnf clean all \
&& rm -rf /var/cache/yum \
&& ln -s /usr/bin/python3 /usr/bin/python



RUN set -ex \
&& wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" \
&& wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc" \
&& export GNUPGHOME="$(mktemp -d)" \
# See https://github.com/tianon/gosu/issues/17#issuecomment-348464529
&& ( gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
|| gpg --keyserver pgp.mit.edu --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
|| gpg --keyserver keyserver.pgp.com --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
|| gpg --keyserver keyserver.ubuntu.com --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
) \
&& gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \
&& rm -rf "${GNUPGHOME}" /usr/local/bin/gosu.asc \
&& chmod +x /usr/local/bin/gosu \
&& gosu nobody true

RUN set -x \
&& git clone --depth 1 --branch $SLURM_TAG https://github.com/SchedMD/slurm.git \
&& pushd slurm \
&& ./configure --enable-debug --prefix=/usr --sysconfdir=/etc/slurm \
--with-mysql_config=/usr/bin --libdir=/usr/lib64 \
&& make install \
&& install -D -m644 etc/cgroup.conf.example /etc/slurm/cgroup.conf.example \
&& install -D -m644 etc/slurm.conf.example /etc/slurm/slurm.conf.example \
&& install -D -m644 etc/slurmdbd.conf.example /etc/slurm/slurmdbd.conf.example \
&& install -D -m644 contribs/slurm_completion_help/slurm_completion.sh /etc/profile.d/slurm_completion.sh \
&& popd \
&& rm -rf slurm \
&& groupadd -r --gid=1001 slurm \
&& useradd -r -g slurm --uid=1001 slurm \
&& mkdir /etc/sysconfig/slurm \
ARG UV_VERSION="0.7.0"

FROM ghcr.io/astral-sh/uv:$UV_VERSION AS uv

FROM debian:bookworm

ARG SLURM_VERSION="24.05.4"
ARG SLURM_USER_UID=990
ARG SLURM_USER_GID=990
ARG MYSQL_CONFIG_VERSION="0.8.34"

COPY --from=uv /uv /bin/uv

RUN set -xe \
&& apt-get update \
&& apt-get install -y curl gnupg lsb-release wget
RUN curl -Lo /tmp/mysql-apt-config.deb "https://dev.mysql.com/get/mysql-apt-config_$MYSQL_CONFIG_VERSION-1_all.deb"
RUN set -xe \
&& dpkg -i /tmp/mysql-apt-config.deb \
&& env DEBIAN_FRONTEND="noninteractive" dpkg-reconfigure mysql-apt-config \
&& apt-get update \
&& apt-get install --yes mysql-server

RUN bash -c "set -xe && for package in 'smd' 'smd-client' 'smd-slurmd' 'smd-slurmctld' 'smd-slurmdbd' 'smd-sview'; do \
curl -Lo \"/tmp/slurm-\${package}.deb\" https://github.com/scalableminds/slurm-packages/releases/download/${SLURM_VERSION}/slurm-\${package}_${SLURM_VERSION}-1_amd64.deb; \
done"
RUN apt-get install --yes -f /tmp/*.deb
RUN rm /tmp/*

RUN set -xe \
&& addgroup --gid="$SLURM_USER_GID" slurm \
&& adduser --system --uid="$SLURM_USER_UID" --ingroup slurm slurm \
&& mkdir -p /etc/sysconfig/slurm \
/var/spool/slurmd \
/var/run/slurmd \
/var/run/slurmdbd \
/var/lib/slurmd \
/var/log/slurm \
/data \
&& touch /var/lib/slurmd/node_state \
/var/lib/slurmd/front_end_state \
/var/lib/slurmd/job_state \
/var/lib/slurmd/resv_state \
/var/lib/slurmd/trigger_state \
/var/lib/slurmd/assoc_mgr_state \
/var/lib/slurmd/assoc_usage \
/var/lib/slurmd/qos_usage \
/var/lib/slurmd/fed_mgr_state \
/run/munge \
&& chown -R slurm:slurm /var/*/slurm* \
&& /sbin/create-munge-key
&& chown -R munge:munge /run/munge

COPY --chown=slurm:slurm slurm.conf /etc/slurm/slurm.conf
COPY --chown=slurm:slurm slurmdbd.conf /etc/slurm/slurmdbd.conf

RUN chmod 600 /etc/slurm/slurm.conf
RUN chmod 600 /etc/slurm/slurmdbd.conf

COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh

ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

CMD ["slurmdbd"]
20 changes: 13 additions & 7 deletions slurm-docker-cluster/docker-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,27 @@ set -e
if [ "$1" = "slurmdbd" ]
then
echo "---> Starting the MUNGE Authentication service (munged) ..."
gosu munge /usr/sbin/munged
setpriv --reuid=munge --regid=munge --init-groups /usr/sbin/munged

echo "---> Starting the Slurm Database Daemon (slurmdbd) ..."

{
. /etc/slurm/slurmdbd.conf
until echo "SELECT 1" | mysql -h $StorageHost -u$StorageUser -p$StoragePass 2>&1 > /dev/null
until echo "SELECT 1" | mysql -h "$StorageHost" -u"$StorageUser" -p"$StoragePass" 2>&1 > /dev/null
do
echo "-- Waiting for database to become active ..."
sleep 2
done
}
echo "-- Database is now active ..."

exec gosu slurm /usr/sbin/slurmdbd -Dvvv
exec setpriv --reuid=slurm --regid=slurm --init-groups /usr/sbin/slurmdbd -Dvvv
fi

if [ "$1" = "slurmctld" ]
then
echo "---> Starting the MUNGE Authentication service (munged) ..."
gosu munge /usr/sbin/munged
setpriv --reuid=munge --regid=munge --init-groups /usr/sbin/munged

echo "---> Waiting for slurmdbd to become active before starting slurmctld ..."

Expand All @@ -36,13 +36,16 @@ then
echo "-- slurmdbd is now active ..."

echo "---> Starting the Slurm Controller Daemon (slurmctld) ..."
exec gosu slurm /usr/sbin/slurmctld -Dvvv
while true; do
setpriv --reuid=slurm --regid=slurm --init-groups /usr/sbin/slurmctld -Dvvv
echo "---> slurmctl: exited with $?"
done
fi

if [ "$1" = "slurmd" ]
then
echo "---> Starting the MUNGE Authentication service (munged) ..."
gosu munge /usr/sbin/munged
setpriv --reuid=munge --regid=munge --init-groups /usr/sbin/munged

echo "---> Waiting for slurmctld to become active before starting slurmd..."

Expand All @@ -54,7 +57,10 @@ then
echo "-- slurmctld is now active ..."

echo "---> Starting the Slurm Node Daemon (slurmd) ..."
exec /usr/sbin/slurmd -Dvvv
while true; do
/usr/sbin/slurmd -Dvvv
echo "---> slurmd: exited with $?"
done
fi

exec "$@"