diff --git a/tests/utils/R/rscript.py b/tests/utils/R/rscript.py index 18b829d4..1d5a2003 100644 --- a/tests/utils/R/rscript.py +++ b/tests/utils/R/rscript.py @@ -29,7 +29,7 @@ class RscriptInstall(RscriptBase): """Tests installing packages with R on the login nodes""" descr = "Tests that R packages can be installed locally. Requires internet access." - valid_systems = ["archer2:login"] + valid_systems = ["archer2:login", "cirrus-ex:login"] local = True executable_opts = ["install_benchmark_packages.R"] libs_path = None @@ -61,7 +61,7 @@ class RscriptRun(RscriptBase): Uses packages installed locally in a previous test. """ - valid_systems = ["archer2:login", "archer2:compute"] + valid_systems = ["archer2:login", "archer2:compute", "cirrus-ex:login", "cirrus-ex:compute"] executable_opts = ["run_benchmark.R"] library = fixture(RscriptInstall, scope="session") libs_path = None diff --git a/tests/utils/containers/containers.py b/tests/utils/containers/containers.py new file mode 100644 index 00000000..31f5fff0 --- /dev/null +++ b/tests/utils/containers/containers.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +""" +Containerised OSU benchmarks + +These tests checks that containers can be run with MPI. Basic performance checks are also included. +""" + +import os + +import reframe as rfm +import reframe.utility.sanity as sn + + +class PullOSUContainerARCHER2(rfm.RunOnlyRegressionTest): + """Pull a container containing an osu benchmark - GLIBC compatible with ARCHER2 OS""" + + descr = "Pull an OSU benchmark container from github " + valid_systems = ["archer2:login"] + valid_prog_environs = ["PrgEnv-gnu"] + executable = "singularity" + image_name = "archer2_osu" + executable_opts = ["pull", f"docker://ghcr.io/epcced/epcc-reframe/{image_name}"] + local = True + + @sanity_function + def validate_download(self): + """Sanity Check""" + return sn.assert_not_found("error", self.stderr) + + +class PullOSUContainerCirrusEX(rfm.RunOnlyRegressionTest): + """Pull a container containing an osu benchmark - GLIBC compatible with CirrusEX OS""" + + descr = "Pull an OSU benchmark container from github " + valid_systems = ["cirrus-ex:login"] + valid_prog_environs = ["PrgEnv-gnu"] + executable = "apptainer" + image_name = "osu-benchmarks" + image_version = "7.5.1" + executable_opts = ["pull", f"docker://ghcr.io/epcced/epcc-reframe/{image_name}:{image_version}"] + local = True + + @sanity_function + def validate_download(self): + """Sanity Check""" + return sn.assert_not_found("error", self.stderr) + + +@rfm.simple_test +class OSUContainerTestARCHER2(rfm.RunOnlyRegressionTest): + """Run the OSU benchmark in a container""" + + descr = "OSU benchmarks in a container" + osu_container = fixture(PullOSUContainerARCHER2, scope="session") + valid_systems = ["archer2:compute"] + valid_prog_environs = ["PrgEnv-gnu"] + num_tasks = 256 + num_tasks_per_node = 128 + num_cpus_per_task = 1 + time_limit = "5m" + + env_vars = { + "OMP_NUM_THREADS": str(num_cpus_per_task), + "OMP_PLACES": "cores", + "SINGULARITYENV_LD_LIBRARY_PATH": "/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib-abi-mpich:\ +/opt/cray/pe/mpich/8.1.23/gtl/lib:/opt/cray/libfabric/1.12.1.2.2.0.0/lib64:\ +/opt/cray/pe/gcc-libs:/opt/cray/pe/gcc-libs:/opt/cray/pe/lib64:/opt/cray/pe/lib64:\ +/opt/cray/xpmem/default/lib64:/usr/lib64/libibverbs:/usr/lib64:/usr/lib64", + "SINGULARITY_BIND": "/opt/cray,/var/spool,\ +/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib-abi-mpich:\ +/opt/cray/pe/mpich/8.1.23/gtl/lib,/etc/host.conf,\ +/etc/libibverbs.d/mlx5.driver,/etc/libnl/classid,\ +/etc/resolv.conf,/opt/cray/libfabric/1.12.1.2.2.0.0/lib64/libfabric.so.1,\ +/opt/cray/pe/gcc-libs/libatomic.so.1,/opt/cray/pe/gcc-libs/libgcc_s.so.1,\ +/opt/cray/pe/gcc-libs/libgfortran.so.5,/opt/cray/pe/gcc-libs/libquadmath.so.0,\ +/opt/cray/pe/lib64/libpals.so.0,/opt/cray/pe/lib64/libpmi2.so.0,\ +/opt/cray/pe/lib64/libpmi.so.0,/opt/cray/xpmem/default/lib64/libxpmem.so.0,\ +/run/munge/munge.socket.2,/usr/lib64/libibverbs/libmlx5-rdmav34.so,\ +/usr/lib64/libibverbs.so.1,/usr/lib64/libkeyutils.so.1,/usr/lib64/liblnetconfig.so.4,\ +/usr/lib64/liblustreapi.so,/usr/lib64/libmunge.so.2,/usr/lib64/libnl-3.so.200,\ +/usr/lib64/libnl-genl-3.so.200,/usr/lib64/libnl-route-3.so.200,/usr/lib64/librdmacm.so.1,\ +/usr/lib64/libyaml-0.so.2", + } + + reference = { + "archer2:compute": {"latency_big": (2200, -0.02, 0.30, "us"), "latency_small": (8.4, -0.05, 0.30, "us")} + } + + @require_deps + def set_singularity_invoke(self): + """Builds the command to be passed to srun""" + self.executable = "singularity" + + self.executable_opts = [ + "run", + os.path.join(self.osu_container.stagedir, self.osu_container.image_name + "_latest.sif"), + "osu_allreduce", + ] + + @performance_function("us") + def latency_big(self): + """Extract the latency from the largest size in the OSU test""" + return sn.extractsingle(r"^1048576\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float) + + @performance_function("us") + def latency_small(self): + """Extract the latency from the largest size in the OSU test""" + return sn.extractsingle(r"^4\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float) + + @sanity_function + def validate_job_run(self): + """Basic check that any output was produced""" + return sn.assert_found("OSU MPI Allreduce Latency Test ", self.stdout) + + +@rfm.simple_test +class OSUContainerTestCirrusEX(rfm.RunOnlyRegressionTest): + """Run the OSU benchmark in a container""" + + descr = "OSU benchmarks in a container" + osu_container = fixture(PullOSUContainerCirrusEX, scope="session") + valid_systems = ["cirrus-ex:compute"] + valid_prog_environs = ["PrgEnv-gnu"] + num_tasks = 576 + num_tasks_per_node = 288 + num_cpus_per_task = 1 + time_limit = "10m" + + env_vars = { + "OMP_NUM_THREADS": str(num_cpus_per_task), + "OMP_PLACES": "cores", + "APPTAINERENV_LD_LIBRARY_PATH": "/opt/cray/pe/mpich/8.1.32/ofi/gnu/11.2/lib-abi-mpich:\ +/opt/cray/libfabric/1.22.0/lib64:\ +/opt/cray/pals/1.6/lib:\ +/opt/cray/pe/lib64:\ +/opt/xpmem/lib64:/lib64", + "APPTAINER_BIND": "/opt/cray,/var/spool,\ +/opt/cray/pe/mpich/8.1.32/ofi/gnu/11.2/lib-abi-mpich,\ +/etc/host.conf,/etc/libibverbs.d/mlx5.driver,\ +/etc/libnl/classid,\ +/etc/resolv.conf,\ +/opt/cray/libfabric/1.22.0/lib64/libfabric.so.1,\ +/lib64/libatomic.so.1,\ +/lib64/libgcc_s.so.1,/lib64/libgfortran.so.5,\ +/lib64/libquadmath.so.0,\ +/opt/cray/pals/1.6/lib/libpals.so.0,\ +/opt/cray/pe/lib64/libpmi2.so.0,\ +/opt/cray/pe/lib64/libpmi.so.0,\ +/opt/xpmem/lib64/libxpmem.so.0,\ +/run/munge/munge.socket.2,\ +/lib64/libmunge.so.2,\ +/lib64/libnl-3.so.200,\ +/lib64/libnl-genl-3.so.200,\ +/lib64/libnl-route-3.so.200,\ +/lib64/librdmacm.so.1,\ +/lib64/libcxi.so.1,\ +/lib64/libm.so.6", + } + + reference = { + "cirrus-ex:compute": {"latency_big": (1100, -0.02, 0.30, "us"), "latency_small": (9.7, -0.05, 0.30, "us")} + } + + @require_deps + def set_singularity_invoke(self): + """Builds the command to be passed to srun""" + self.executable = "apptainer" + + self.executable_opts = [ + "run", + os.path.join( + self.osu_container.stagedir, + self.osu_container.image_name + "_" + self.osu_container.image_version + ".sif", + ), + "osu_allreduce", + ] + + @performance_function("us") + def latency_big(self): + """Extract the latency from the largest size in the OSU test""" + return sn.extractsingle(r"^1048576\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float) + + @performance_function("us") + def latency_small(self): + """Extract the latency from the largest size in the OSU test""" + return sn.extractsingle(r"^4\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float) + + @sanity_function + def validate_job_run(self): + """Basic check that any output was produced""" + return sn.assert_found("OSU MPI Allreduce Latency Test ", self.stdout) diff --git a/tests/utils/singularity/singularity.py b/tests/utils/singularity/singularity.py deleted file mode 100644 index 1e10edd3..00000000 --- a/tests/utils/singularity/singularity.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python3 -""" -Singularity OSU benchmarks - -These tests checks that singularity containers can be run with mpi. Basic performance checks are also included. -""" - -import os - -import reframe as rfm -import reframe.utility.sanity as sn - - -class PullOSUContainer(rfm.RunOnlyRegressionTest): - """Pull a container containing an osu benchmark""" - - descr = "Pill a osu benchmark container from github " - valid_systems = ["archer2:login"] - valid_prog_environs = ["PrgEnv-gnu"] - executable = "singularity" - image_name = "archer2_osu" - executable_opts = ["pull", f"docker://ghcr.io/epcced/epcc-reframe/{image_name}"] - local = True - - @sanity_function - def validate_download(self): - """Sanity Check""" - return sn.assert_not_found("error", self.stderr) - - -@rfm.simple_test -class OSUContainerTest(rfm.RunOnlyRegressionTest): - """Run the OSU benchmark in a container""" - - descr = "OSU benchmarks in a container" - osu_container = fixture(PullOSUContainer, scope="session") - valid_systems = ["archer2:compute"] - valid_prog_environs = ["PrgEnv-gnu"] - num_tasks = 256 - num_tasks_per_node = 128 - num_cpus_per_task = 1 - time_limit = "5m" - - env_vars = { - "OMP_NUM_THREADS": str(num_cpus_per_task), - "OMP_PLACES": "cores", - "SINGULARITYENV_LD_LIBRARY_PATH": "/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib-abi-mpich:\ -/opt/cray/pe/mpich/8.1.23/gtl/lib:/opt/cray/libfabric/1.12.1.2.2.0.0/lib64:\ -/opt/cray/pe/gcc-libs:/opt/cray/pe/gcc-libs:/opt/cray/pe/lib64:/opt/cray/pe/lib64:\ -/opt/cray/xpmem/default/lib64:/usr/lib64/libibverbs:/usr/lib64:/usr/lib64", - "SINGULARITY_BIND": "/opt/cray,/var/spool,\ -/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib-abi-mpich:\ -/opt/cray/pe/mpich/8.1.23/gtl/lib,/etc/host.conf,\ -/etc/libibverbs.d/mlx5.driver,/etc/libnl/classid,\ -/etc/resolv.conf,/opt/cray/libfabric/1.12.1.2.2.0.0/lib64/libfabric.so.1,\ -/opt/cray/pe/gcc-libs/libatomic.so.1,/opt/cray/pe/gcc-libs/libgcc_s.so.1,\ -/opt/cray/pe/gcc-libs/libgfortran.so.5,/opt/cray/pe/gcc-libs/libquadmath.so.0,\ -/opt/cray/pe/lib64/libpals.so.0,/opt/cray/pe/lib64/libpmi2.so.0,\ -/opt/cray/pe/lib64/libpmi.so.0,/opt/cray/xpmem/default/lib64/libxpmem.so.0,\ -/run/munge/munge.socket.2,/usr/lib64/libibverbs/libmlx5-rdmav34.so,\ -/usr/lib64/libibverbs.so.1,/usr/lib64/libkeyutils.so.1,/usr/lib64/liblnetconfig.so.4,\ -/usr/lib64/liblustreapi.so,/usr/lib64/libmunge.so.2,/usr/lib64/libnl-3.so.200,\ -/usr/lib64/libnl-genl-3.so.200,/usr/lib64/libnl-route-3.so.200,/usr/lib64/librdmacm.so.1,\ -/usr/lib64/libyaml-0.so.2", - } - - reference = { - "archer2:compute": {"latency_big": (2200, -0.02, 0.30, "us"), "latency_small": (8.4, -0.05, 0.30, "us")} - } - - @require_deps - def set_singularity_invoke(self): - """Builds the command to be passed to srun""" - self.executable = "singularity" - - self.executable_opts = [ - "run", - os.path.join(self.osu_container.stagedir, self.osu_container.image_name + "_latest.sif"), - "osu_allreduce", - ] - - @performance_function("us") - def latency_big(self): - """Extract the latency from the largest size in the OSU test""" - return sn.extractsingle(r"^1048576\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float) - - @performance_function("us") - def latency_small(self): - """Extract the latency from the largest size in the OSU test""" - return sn.extractsingle(r"^4\W+([0-9]+(?:.[0-9]+)?)", self.stdout, 1, float) - - @sanity_function - def validate_job_run(self): - """Basic check that any output was produced""" - return sn.assert_found("OSU MPI Allreduce Latency Test ", self.stdout) diff --git a/tests/utils/xthi/hetjob.py b/tests/utils/xthi/hetjob.py index 2c81b2e2..77554afb 100644 --- a/tests/utils/xthi/hetjob.py +++ b/tests/utils/xthi/hetjob.py @@ -24,14 +24,14 @@ class SharedCommWorldTest(rfm.RunOnlyRegressionTest): maintainers = ["k.stratford@epcc.ed.ac.uk"] descr = "SLURM hetjob for xthi shared MPI_COM_WORLD" - valid_systems = ["archer2:compute"] + valid_systems = ["archer2:compute", "cirrus-ex:compute"] valid_prog_environs = ["*"] modules = ["xthi"] # Utter, utter kludge # 1 + 2 nodes; 8 + 2x4 MPI tasks - hetgroup0 = "--het-group=0 --nodes=1 --ntasks=8 --ntasks-per-node=8 xthi" - hetgroup1 = "--het-group=1 --nodes=2 --ntasks=8 --ntasks-per-node=4 xthi" + hetgroup0 = "--het-group=0 --nodes=1 --ntasks=8 --ntasks-per-node=8 xthi_mpi_mp" + hetgroup1 = "--het-group=1 --nodes=2 --ntasks=8 --ntasks-per-node=4 xthi_mpi_mp" executable = hetgroup0 + " : " + hetgroup1 time_limit = "2m" @@ -56,7 +56,7 @@ class SharedCommWorldWithOpenMPTest(rfm.RunOnlyRegressionTest): """ descr = "SLURM hetjob for shared MPI_COM_WORLD with OpenMP" - valid_systems = ["archer2:compute"] + valid_systems = ["archer2:compute", "cirrus-ex:compute"] valid_prog_environs = ["*"] modules = ["xthi"] @@ -64,8 +64,8 @@ class SharedCommWorldWithOpenMPTest(rfm.RunOnlyRegressionTest): shared_args = " --nodes=1 --ntasks=8 --tasks-per-node=8 --cpus-per-task=16" openmp0 = " --export=all,OMP_NUM_THREADS=16" openmp1 = " --export=all,OMP_NUM_THREADS=1" - hetgroup0 = "--het-group=0" + shared_args + openmp0 + " xthi" - hetgroup1 = "--het-group=1" + shared_args + openmp1 + " xthi" + hetgroup0 = "--het-group=0" + shared_args + openmp0 + " xthi_mpi_mp" + hetgroup1 = "--het-group=1" + shared_args + openmp1 + " xthi_mpi_mp" executable = hetgroup0 + " : " + hetgroup1 time_limit = "2m" diff --git a/tests/utils/xthi/xthi.py b/tests/utils/xthi/xthi.py index c837714d..460913ab 100644 --- a/tests/utils/xthi/xthi.py +++ b/tests/utils/xthi/xthi.py @@ -13,11 +13,12 @@ class XthiCompilationTest(reframe.CompileOnlyRegressionTest): maintainers = ["k.straford@epcc.ed.ac.uk"] descr = "xthi compilation test" - valid_systems = ["archer2:login", "cirrus:login"] + valid_systems = ["archer2:login", "cirrus:login", "cirrus-ex:login"] valid_prog_environs = [ "PrgEnv-cray", "PrgEnv-gnu", "PrgEnv-aocc", + "PrgEnv-intel", "gcc", "intel", ]