Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions .github/workflows/build_and_run_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,32 +47,38 @@ jobs:
res: [ ne30_ne30_mg17 ]
dycore: [ theta-l, theta-l_kokkos ]
image: [
opensuse15_gcc12_openmpi5.0.8_cpu:970927a,
opensuse15_gcc12_openmpi5.0.8_cuda12.9:970927a,
almalinux9.6_intel2024_openmpi5.0.8_cpu:970927a,
ubuntu24.04_nvhpc25.7_openmpi4.1.5_cpu:970927a,
ubuntu24.04_nvhpc26.1_openmpi5.0.8_cuda12.9:970927a
opensuse15_gcc12_openmpi5.0.8_cpu:e9f55f1,
opensuse15_gcc12_openmpi5.0.8_cuda12.9:e9f55f1,
almalinux9.6_intel2024_openmpi5.0.8_cpu:e9f55f1,
ubuntu24.04_nvhpc25.7_openmpi4.1.5_cpu:e9f55f1,
ubuntu24.04_nvhpc25.7_openmpi5.0.8_cuda12.9:e9f55f1,
ubuntu24.04_nvhpc26.1_openmpi5.0.8_cuda12.9:e9f55f1
]
include:
- image: opensuse15_gcc12_openmpi5.0.8_cpu:970927a
- image: opensuse15_gcc12_openmpi5.0.8_cpu:e9f55f1
runner: gha-runner-stormspeed
compiler: gnu
- image: almalinux9.6_intel2024_openmpi5.0.8_cpu:970927a
- image: almalinux9.6_intel2024_openmpi5.0.8_cpu:e9f55f1
runner: gha-runner-stormspeed
compiler: intel
- image: ubuntu24.04_nvhpc25.7_openmpi4.1.5_cpu:970927a
- image: ubuntu24.04_nvhpc25.7_openmpi4.1.5_cpu:e9f55f1
runner: gha-runner-stormspeed
compiler: nvhpc
- image: opensuse15_gcc12_openmpi5.0.8_cuda12.9:970927a
- image: opensuse15_gcc12_openmpi5.0.8_cuda12.9:e9f55f1
runner: gha-runner-gpu-stormspeed
compiler: gnu
- image: ubuntu24.04_nvhpc26.1_openmpi5.0.8_cuda12.9:970927a
- image: ubuntu24.04_nvhpc25.7_openmpi5.0.8_cuda12.9:e9f55f1
runner: gha-runner-gpu-stormspeed
compiler: nvhpc
- image: ubuntu24.04_nvhpc26.1_openmpi5.0.8_cuda12.9:e9f55f1
runner: gha-runner-gpu-stormspeed
compiler: nvhpc
exclude:
- image: opensuse15_gcc12_openmpi5.0.8_cuda12.9:970927a
- image: opensuse15_gcc12_openmpi5.0.8_cuda12.9:e9f55f1
dycore: theta-l
- image: ubuntu24.04_nvhpc25.7_openmpi5.0.8_cuda12.9:e9f55f1
dycore: theta-l
- image: ubuntu24.04_nvhpc26.1_openmpi5.0.8_cuda12.9:970927a
- image: ubuntu24.04_nvhpc26.1_openmpi5.0.8_cuda12.9:e9f55f1
dycore: theta-l
runs-on: ${{ matrix.runner }}
container:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/build_image_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jobs:
- Dockerfile.opensuse15_gcc12_openmpi5.0.8_cpu
- Dockerfile.opensuse15_gcc12_openmpi5.0.8_cuda12.9
- Dockerfile.ubuntu24.04_nvhpc25.7_openmpi4.1.5_cpu
- Dockerfile.ubuntu24.04_nvhpc25.7_openmpi5.0.8_cuda12.9
- Dockerfile.ubuntu24.04_nvhpc26.1_openmpi5.0.8_cuda12.9
include:
- dockerfile: Dockerfile.almalinux9.6_intel2024_openmpi5.0.8_cpu
Expand All @@ -34,6 +35,8 @@ jobs:
runner: gha-runner-stormspeed
- dockerfile: Dockerfile.opensuse15_gcc12_openmpi5.0.8_cuda12.9
runner: gha-runner-gpu-stormspeed
- dockerfile: Dockerfile.ubuntu24.04_nvhpc25.7_openmpi5.0.8_cuda12.9
runner: gha-runner-gpu-stormspeed
- dockerfile: Dockerfile.ubuntu24.04_nvhpc26.1_openmpi5.0.8_cuda12.9
runner: gha-runner-gpu-stormspeed
steps:
Expand Down
14 changes: 7 additions & 7 deletions dockerfiles/Dockerfile.almalinux9.6_intel2024_openmpi5.0.8_cpu
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2
rm -rf awscliv2.zip aws && \
aws --version

# Clone Spack with tag v1.0.0
RUN git clone --depth=2 --branch v1.0.0 https://github.com/spack/spack.git
# Clone Spack with tag v1.1.0
RUN git clone --depth=2 --branch v1.1.0 https://github.com/spack/spack.git

########################
# Build intel compiler #
Expand All @@ -49,7 +49,7 @@ RUN . /opt/spack/share/spack/setup-env.sh && \
update-alternatives --install /usr/bin/icx icx $(spack location -i [email protected])/compiler/2024.2/bin/icx 120 && \
update-alternatives --install /usr/bin/ifort ifort $(spack location -i [email protected])/compiler/2024.2/bin/ifort 120 && \
mkdir -p /usr/local/intel-oneapi-compiler && \
ln -s $(spack location -i [email protected])/compiler/2024.2 /usr/local/intel-oneapi-compiler/2024.2
ln -s $(spack location -i [email protected]) /usr/local/intel-oneapi-compilers

########################
# Build libxml2, cmake #
Expand Down Expand Up @@ -79,12 +79,12 @@ packages:
buildable: false
externals:
- spec: [email protected] languages:='c,c++,fortran'
prefix: /usr/local/intel-oneapi-compiler/2024.2
prefix: /usr/local/intel-oneapi-compilers
extra_attributes:
compilers:
c: /usr/local/intel-oneapi-compiler/2024.2/bin/icx
cxx: /usr/local/intel-oneapi-compiler/2024.2/bin/icpx
fortran: /usr/local/intel-oneapi-compiler/2024.2/bin/ifort
c: /usr/local/intel-oneapi-compilers/compiler/2024.2/bin/icx
cxx: /usr/local/intel-oneapi-compilers/compiler/2024.2/bin/icpx
fortran: /usr/local/intel-oneapi-compilers/compiler/2024.2/bin/ifort
gcc:
externals:
- spec: [email protected] languages:='c,c++,fortran'
Expand Down
4 changes: 2 additions & 2 deletions dockerfiles/Dockerfile.opensuse15_gcc12_openmpi5.0.8_cpu
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2
rm -rf awscliv2.zip aws && \
aws --version

# Clone Spack with tag v1.0.0
RUN git clone --depth=2 --branch v1.0.0 https://github.com/spack/spack.git
# Clone Spack with tag v1.1.0
RUN git clone --depth=2 --branch v1.1.0 https://github.com/spack/spack.git

########################
# Build libxml2, cmake #
Expand Down
211 changes: 211 additions & 0 deletions dockerfiles/Dockerfile.ubuntu24.04_nvhpc25.7_openmpi5.0.8_cuda12.9
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
#############################################################################################################
# Base image #
# - Do not try to install nvhpc and cuda from source; #
# - It may not build some packages below and you need to switch back to gcc compiler; #
# - NVIDIA also does not provide OpenSUSE image; use Ubuntu instead and good to test a different OS system. #
#############################################################################################################
FROM nvcr.io/nvidia/nvhpc:25.7-devel-cuda12.9-ubuntu24.04 AS spack_base

ENV DEBIAN_FRONTEND=noninteractive

SHELL ["/bin/bash", "-c"]

WORKDIR /opt

# Install python3 (needed for git-fleximod) as prerequisite for Spack
# This image also comes with gcc/13.3.0 compiler
RUN apt-get update && \
apt-get install -y unzip curl pkg-config && \
apt-get install -y software-properties-common && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y python3.13 python3.13-dev python3.13-venv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.13 80 && \
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.13 80

# Install AWS CLI v2
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip awscliv2.zip && \
./aws/install && \
rm -rf awscliv2.zip aws && \
aws --version

# Link CUDA
RUN ln -s /opt/nvidia/hpc_sdk/Linux_x86_64/25.7/cuda /usr/local/cuda && \
ln -s /opt/nvidia/hpc_sdk/Linux_x86_64/25.7/math_libs /usr/local/math_libs

# Clean up unused packages
RUN cd /opt/nvidia/hpc_sdk/Linux_x86_64/25.7 && \
rm -rf profilers

# Clone Spack with tag v1.1.0
RUN git clone --depth=2 --branch v1.1.0 https://github.com/spack/spack.git

#################
# Build libxml2 #
#################
FROM spack_base AS xml2

# Install xmllint that is required by CIME
# This image comes with cmake/3.28.3
# The xmllint built by NVHPC does not work; so we switch to gcc
RUN . /opt/spack/share/spack/setup-env.sh && \
spack install [email protected] %gcc && \
ln -s $(spack location -i [email protected])/bin/xmllint /usr/local/bin/xmllint

#################
# Build openmpi #
#################
FROM xml2 AS openmpi

# Avoid building CUDA since it is already provided by the base image
# Also set the nvhpc compilers as externals to avoid building them from source
RUN cat <<'EOF' > /root/.spack/packages.yaml
packages:
cuda:
buildable: false
externals:
- spec: [email protected]
prefix: /opt/nvidia/hpc_sdk/Linux_x86_64/25.7/cuda
nvhpc:
buildable: false
externals:
- spec: [email protected] languages:='c,c++,fortran'
prefix: /opt/nvidia/hpc_sdk/Linux_x86_64/25.7/compilers
extra_attributes:
compilers:
c: /opt/nvidia/hpc_sdk/Linux_x86_64/25.7/compilers/bin/nvc
cxx: /opt/nvidia/hpc_sdk/Linux_x86_64/25.7/compilers/bin/nvc++
fortran: /opt/nvidia/hpc_sdk/Linux_x86_64/25.7/compilers/bin/nvfortran
gcc:
externals:
- spec: [email protected] languages:='c,c++,fortran'
prefix: /usr
extra_attributes:
compilers:
c: /usr/bin/gcc
cxx: /usr/bin/g++
fortran: /usr/bin/gfortran
EOF

# Tell Spack to scan the OS for Python and never compile it from source
RUN . /opt/spack/share/spack/setup-env.sh && \
spack external find --not-buildable python

# Skip installing ucx as NVHPC can not build it
RUN . /opt/spack/share/spack/setup-env.sh && \
spack install [email protected] fabrics=cma,ucx,ofi +cuda cuda_arch=86 %[email protected] && \
ln -s $(spack location -i [email protected]) /usr/local/openmpi

#########################
# Build parallel-netcdf #
#########################
FROM openmpi AS parallel_netcdf

RUN . /opt/spack/share/spack/setup-env.sh && \
spack install [email protected] +cxx +fortran +pic +shared %[email protected] ^[email protected] && \
ln -s $(spack location -i [email protected]) /usr/local/pnetcdf

################
# Build netCDF #
################
FROM parallel_netcdf AS netcdf

RUN . /opt/spack/share/spack/setup-env.sh && \
spack install [email protected] %[email protected] ^[email protected] ^[email protected] && \
ln -s $(spack location -i [email protected]) /usr/local/netcdf_c

# Install netcdf-fortran; it will install the netcdf-c and openmpi dependency by default
RUN . /opt/spack/share/spack/setup-env.sh && \
spack install [email protected] %[email protected] ^[email protected] ^[email protected] && \
ln -s $(spack location -i [email protected]) /usr/local/netcdf_fortran

# Symlink the netcdf-c and netcdf-fortran installation directories to /usr/local so that CMake find_package works in a CAM build
RUN . /opt/spack/share/spack/setup-env.sh && \
spack view symlink -i /usr/local netcdf-c netcdf-fortran

#############
# Build PIO #
#############
FROM netcdf AS parallelio

# Install parallelio; it will install the parallel-netcdf dependency by default
RUN . /opt/spack/share/spack/setup-env.sh && \
spack install [email protected] +pnetcdf +fortran +mpi +shared +ncint %[email protected] ^[email protected] ^[email protected] && \
ln -s $(spack location -i [email protected]) /usr/local/pio

##############
# Build ESMF #
##############
FROM parallelio AS esmf

# Use toolchains mechanism to set the NVHPC compilers as the default compilers for ESMF
RUN cat <<'EOF' > /root/.spack/toolchains.yaml
toolchains:
nvhpc-257:
- spec: '%[email protected]'
when: '%c'
- spec: '%[email protected]'
when: '%cxx'
- spec: '%[email protected]'
when: '%fortran'
EOF

# Install ESMF; it will install its parallelio and parallel-netcdf dependencies by default
RUN . /opt/spack/share/spack/setup-env.sh && \
spack install [email protected] && \
# Patch the ESMF package.py to treat NVHPC as PGI instead of throwing an error
sed -i 's/raise InstallError(msg.format(self.pkg.compiler.name))/env.set("ESMF_COMPILER", "pgi")/g' $(spack location --package-dir esmf)/package.py && \
spack install "[email protected] +pnetcdf +mpi +shared %nvhpc-257 ^[email protected] ^[email protected] ^[email protected]" && \
ln -s $(spack location -i [email protected]) /usr/local/esmf

################
# Build LAPACK #
################
FROM esmf AS final_image

RUN . /opt/spack/share/spack/setup-env.sh && \
spack install [email protected] %[email protected] && \
ln -s $(spack location -i [email protected]) /usr/local/lapack

################################################
# Set up environment variables for CI workflow #
################################################
ENV CXX=nvc++
ENV CC=nvc
ENV FC=nvfortran
ENV MPI_ROOT="/usr/local/openmpi"
ENV NETCDF_C_PATH="/usr/local/netcdf_c"
ENV NETCDF_FORTRAN_PATH="/usr/local/netcdf_fortran"
ENV PNETCDF="/usr/local/pnetcdf"
ENV PIO="/usr/local/pio"
ENV ESMFMKFILE="/usr/local/esmf/lib/esmf.mk"
ENV LAPACK="/usr/local/lapack"
ENV PIO_VERSION_MAJOR=2
ENV PIO_TYPENAME_VALID_VALUES="netcdf, pnetcdf, netcdf4c, netcdf4p"
ENV CUDA_ROOT="/usr/local/cuda"
ENV CUBLAS_ROOT="/usr/local/math_libs"
ENV PATH="/usr/local:/usr/local/bin:$MPI_ROOT/bin:$CUDA_ROOT/bin:${PATH}"
ENV USER=robot
# Set the default compiler for nvcc wrapper
ENV NVCC_WRAPPER_DEFAULT_COMPILER=nvc++
# Make sure nvc++ can find cuda headers
ENV CPATH="${CUDA_ROOT}/include:${CPATH}"
ENV CPLUS_INCLUDE_PATH="${CUDA_ROOT}/include:${CPLUS_INCLUDE_PATH}"
# Force OpenMPI wrappers to use the NVHPC backend compilers
ENV OMPI_CC=nvc
ENV OMPI_CXX=nvc++
ENV OMPI_FC=nvfortran

##########################
# Miscellaneous settings #
##########################
WORKDIR /tmp

LABEL maintainer="Jian Sun"
LABEL description="Ubuntu 24.04 container with nvhpc/25.7 and cuda/12.9 software stack for StormSPEED CAM"

CMD ["/bin/bash"]