From 1fd638925fdc6caf240998d55546a45493ed8027 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 28 Jan 2026 14:36:01 +0100 Subject: [PATCH 01/68] Attempt to add cuda support to distributed ci pipeline --- ci/distributed.yml | 3 ++- ci/docker/base_mpi.Dockerfile | 36 ++++++++++++++++--------------- ci/docker/checkout_mpi.Dockerfile | 2 +- scripts/ci-mpi-wrapper.sh | 2 ++ 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 8b173b22b0..d8f8b9e920 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -80,7 +80,8 @@ build_distributed_cpu: parallel: matrix: - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] - BACKEND: [embedded, gtfn_cpu, dace_cpu] + # BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu] + BACKEND: [dace_cpu, dace_gpu] rules: - if: $COMPONENT == 'atmosphere/diffusion' variables: diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index 3fcdb21297..914b556136 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -4,23 +4,25 @@ ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 ARG DEBIAN_FRONTEND=noninteractive -RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends \ - strace \ - build-essential \ - tar \ - wget \ - curl \ - libboost-dev \ - libnuma-dev \ - libopenmpi-dev \ - ca-certificates \ - libssl-dev \ - autoconf \ - automake \ - libtool \ - pkg-config \ - libreadline-dev \ - git && \ +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + autoconf \ + automake \ + build-essential \ + ca-certificates \ + curl \ + git \ + libboost-dev \ + libnuma-dev \ + libopenmpi-dev \ + libreadline-dev \ + libssl-dev \ + libtool \ + nvidia-cuda-dev \ + pkg-config \ + strace \ + tar \ + wget && \ rm -rf /var/lib/apt/lists/* # Install uv: https://docs.astral.sh/uv/guides/integration/docker diff --git a/ci/docker/checkout_mpi.Dockerfile b/ci/docker/checkout_mpi.Dockerfile index c229d6c374..62ea5daeae 100644 --- a/ci/docker/checkout_mpi.Dockerfile +++ b/ci/docker/checkout_mpi.Dockerfile @@ -8,4 +8,4 @@ ARG PYVERSION ARG VENV ENV UV_PROJECT_ENVIRONMENT=$VENV ENV MPI4PY_BUILD_BACKEND="scikit-build-core" -RUN uv sync --extra distributed --python=$PYVERSION +RUN uv sync --extra all --python=$PYVERSION diff --git a/scripts/ci-mpi-wrapper.sh b/scripts/ci-mpi-wrapper.sh index 900dd340ae..c0aa25d41f 100755 --- a/scripts/ci-mpi-wrapper.sh +++ b/scripts/ci-mpi-wrapper.sh @@ -17,6 +17,8 @@ else exit 1 fi +export CUDA_VISIBLE_DEVICES="${rank}" + log_file="${CI_PROJECT_DIR:+${CI_PROJECT_DIR}/}pytest-log-rank-${rank}.txt" if [[ "${rank}" -eq 0 ]]; then From cbb1891e84a85b316a550b81d497021313244190 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 28 Jan 2026 17:03:44 +0100 Subject: [PATCH 02/68] Add cuda12 extra --- ci/docker/checkout_mpi.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/checkout_mpi.Dockerfile b/ci/docker/checkout_mpi.Dockerfile index 62ea5daeae..4cbf1d32c0 100644 --- a/ci/docker/checkout_mpi.Dockerfile +++ b/ci/docker/checkout_mpi.Dockerfile @@ -8,4 +8,4 @@ ARG PYVERSION ARG VENV ENV UV_PROJECT_ENVIRONMENT=$VENV ENV MPI4PY_BUILD_BACKEND="scikit-build-core" -RUN uv sync --extra all --python=$PYVERSION +RUN uv sync --extra all --extra cuda12 --python=$PYVERSION From bbb151cbef4a93e65a6ccb451bc9f24e10dfd36c Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 28 Jan 2026 18:02:02 +0100 Subject: [PATCH 03/68] Add nvidia-cuda-toolkit --- ci/docker/base_mpi.Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index 914b556136..92cb700e22 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -19,6 +19,7 @@ RUN apt-get update && \ libssl-dev \ libtool \ nvidia-cuda-dev \ + nvidia-cuda-toolkit \ pkg-config \ strace \ tar \ From b9be7fb076c60cc495ce92aa8719b2d2032269a3 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 29 Jan 2026 13:02:09 +0100 Subject: [PATCH 04/68] Revert "refactor: testing infrastructure (#1002)" This reverts commit e30c2f71e668952698fd93e3ce1a1c054029ea6c. --- .../model/common/utils/device_utils.py | 3 --- .../icon4py/model/testing/data_handling.py | 23 +++-------------- .../model/testing/fixtures/datatest.py | 25 +++++++++++++++++-- .../icon4py/model/testing/stencil_tests.py | 17 ++++++------- 4 files changed, 33 insertions(+), 35 deletions(-) diff --git a/model/common/src/icon4py/model/common/utils/device_utils.py b/model/common/src/icon4py/model/common/utils/device_utils.py index 360a53902a..cacfc8eb64 100644 --- a/model/common/src/icon4py/model/common/utils/device_utils.py +++ b/model/common/src/icon4py/model/common/utils/device_utils.py @@ -37,9 +37,6 @@ def sync(allocator: gtx_typing.FieldBufferAllocationUtil | None = None) -> None: Note: this is and ad-hoc interface, maybe the function should get the device to sync for. """ - # Type annotation already describes that only these types are allowed, but mypy coverage is not great. - # The explicit assert avoids critical mistakes in using this function. - assert allocator is None or gtx_allocators.is_field_allocation_tool(allocator) if allocator is not None and is_cupy_device(allocator): cp.cuda.runtime.deviceSynchronize() diff --git a/model/testing/src/icon4py/model/testing/data_handling.py b/model/testing/src/icon4py/model/testing/data_handling.py index 9624c64839..9ecf932335 100644 --- a/model/testing/src/icon4py/model/testing/data_handling.py +++ b/model/testing/src/icon4py/model/testing/data_handling.py @@ -6,13 +6,11 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause -import pathlib import tarfile +from pathlib import Path -from icon4py.model.testing import config, locking - -def download_and_extract(uri: str, dst: pathlib.Path, data_file: str = "downloaded.tar.gz") -> None: +def download_and_extract(uri: str, dst: Path, data_file: str = "downloaded.tar.gz") -> None: """ Download data archive from remote server. @@ -33,19 +31,4 @@ def download_and_extract(uri: str, dst: pathlib.Path, data_file: str = "download raise OSError(f"{data_file} needs to be a valid tar file") with tarfile.open(data_file, mode="r:*") as tf: tf.extractall(path=dst) - pathlib.Path(data_file).unlink(missing_ok=True) - - -def download_test_data(dst: pathlib.Path, uri: str) -> None: - if config.ENABLE_TESTDATA_DOWNLOAD: - # We create and lock the *parent* directory as we later check for existence of `dst`. - dst.parent.mkdir(parents=True, exist_ok=True) - with locking.lock(dst.parent): - if not dst.exists(): - download_and_extract(uri, dst) - else: - # If test data download is disabled, we check if the directory exists - # without locking. We assume the location is managed by the user - # and avoid locking shared directories (e.g. on CI). - if not dst.exists(): - raise RuntimeError(f"Test data {dst} does not exist, and downloading is disabled.") + Path(data_file).unlink(missing_ok=True) diff --git a/model/testing/src/icon4py/model/testing/fixtures/datatest.py b/model/testing/src/icon4py/model/testing/fixtures/datatest.py index c1d17332e9..28483172a1 100644 --- a/model/testing/src/icon4py/model/testing/fixtures/datatest.py +++ b/model/testing/src/icon4py/model/testing/fixtures/datatest.py @@ -17,7 +17,13 @@ from icon4py.model.common import model_backends, model_options from icon4py.model.common.constants import RayleighType from icon4py.model.common.grid import base as base_grid -from icon4py.model.testing import data_handling as data, datatest_utils as dt_utils, definitions +from icon4py.model.testing import ( + config, + data_handling as data, + datatest_utils as dt_utils, + definitions, + locking, +) if TYPE_CHECKING: @@ -119,7 +125,22 @@ def _download_ser_data( try: destination_path = dt_utils.get_datapath_for_experiment(_ranked_data_path, _experiment) uri = _experiment.partitioned_data[comm_size] - data.download_test_data(destination_path, uri) + + data_file = _ranked_data_path.joinpath(f"{_experiment.name}_mpitask{comm_size}.tar.gz").name + _ranked_data_path.mkdir(parents=True, exist_ok=True) + if config.ENABLE_TESTDATA_DOWNLOAD: + with locking.lock(_ranked_data_path): + # Note: if the lock would be created for `destination_path` it would always exist... + if not destination_path.exists(): + data.download_and_extract(uri, _ranked_data_path, data_file) + else: + # If test data download is disabled, we check if the directory exists + # without locking. We assume the location is managed by the user + # and avoid locking shared directories (e.g. on CI). + if not destination_path.exists(): + raise RuntimeError( + f"Serialization data {data_file} does not exist, and downloading is disabled." + ) except KeyError as err: raise RuntimeError( f"No data for communicator of size {comm_size} exists, use 1, 2 or 4" diff --git a/model/testing/src/icon4py/model/testing/stencil_tests.py b/model/testing/src/icon4py/model/testing/stencil_tests.py index ad1bf5e0ac..f83798f029 100644 --- a/model/testing/src/icon4py/model/testing/stencil_tests.py +++ b/model/testing/src/icon4py/model/testing/stencil_tests.py @@ -21,7 +21,6 @@ config as gtx_config, constructors, metrics as gtx_metrics, - named_collections as gtx_named_collections, typing as gtx_typing, ) @@ -35,15 +34,13 @@ def allocate_data( allocator: gtx_typing.FieldBufferAllocationUtil | None, - input_data: dict[ - str, Any - ], # `Field`s or collection of `Field`s are re-allocated, the rest is passed through -) -> dict[str, Any]: - def _allocate_field(f: gtx.Field) -> gtx.Field: - return constructors.as_field(domain=f.domain, data=f.ndarray, allocator=allocator) - + input_data: dict[str, gtx.Field | tuple[gtx.Field, ...]], +) -> dict[str, gtx.Field | tuple[gtx.Field, ...]]: + _allocate_field = constructors.as_field.partial(allocator=allocator) # type:ignore[attr-defined] # TODO(havogt): check why it doesn't understand the fluid_partial input_data = { - k: gtx_named_collections.tree_map_named_collection(_allocate_field)(v) + k: tuple(_allocate_field(domain=field.domain, data=field.ndarray) for field in v) + if isinstance(v, tuple) + else _allocate_field(domain=v.domain, data=v.ndarray) if not gtx.is_scalar_type(v) and k != "domain" else v for k, v in input_data.items() @@ -210,7 +207,7 @@ def _properly_allocated_input_data( self, input_data: dict[str, gtx.Field | tuple[gtx.Field, ...]], backend_like: model_backends.BackendLike, - ) -> dict[str, Any]: + ) -> dict[str, gtx.Field | tuple[gtx.Field, ...]]: # TODO(havogt): this is a workaround, # because in the `input_data` fixture provided by the user # it does not allocate for the correct device. From 731283a76200caadf5ca5c19ac68c26c79949ef5 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 29 Jan 2026 14:00:57 +0100 Subject: [PATCH 05/68] Use cxi hook in ci --- ci/distributed.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index d8f8b9e920..00953956a1 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -38,7 +38,7 @@ build_distributed_baseimage_aarch64: DOCKERFILE: ci/docker/checkout_mpi.Dockerfile DOCKER_BUILD_ARGS: '["PYVERSION=$PYVERSION", "BASE_IMAGE=${BASE_IMAGE_${PYVERSION_PREFIX}}", "VENV=${UV_PROJECT_ENVIRONMENT}"]' PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi - USE_MPI: NO + USE_MPI: YES SLURM_MPI_TYPE: pmix PMIX_MCA_psec: native PMIX_MCA_gds: "^shmem2" From ea2b3aa7bbfddde4b32bee52ac857b999fec5884 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 29 Jan 2026 14:24:53 +0100 Subject: [PATCH 06/68] Try mpich --- ci/distributed.yml | 7 ++++--- ci/docker/base_mpi.Dockerfile | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 00953956a1..5f58839466 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -39,9 +39,10 @@ build_distributed_baseimage_aarch64: DOCKER_BUILD_ARGS: '["PYVERSION=$PYVERSION", "BASE_IMAGE=${BASE_IMAGE_${PYVERSION_PREFIX}}", "VENV=${UV_PROJECT_ENVIRONMENT}"]' PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi USE_MPI: YES - SLURM_MPI_TYPE: pmix - PMIX_MCA_psec: native - PMIX_MCA_gds: "^shmem2" + SLURM_MPI_TYPE: pmi2 + # SLURM_MPI_TYPE: pmix + # PMIX_MCA_psec: native + # PMIX_MCA_gds: "^shmem2" .build_distributed_cpu: extends: [.build_distributed_template] diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index 92cb700e22..d7c6b379c5 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -13,8 +13,8 @@ RUN apt-get update && \ curl \ git \ libboost-dev \ + libmpich-dev \ libnuma-dev \ - libopenmpi-dev \ libreadline-dev \ libssl-dev \ libtool \ From 8f04d362b80a7f07ade11bf9ebf48f951b5c9f5c Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 29 Jan 2026 14:25:13 +0100 Subject: [PATCH 07/68] Reduce tests --- ci/distributed.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 5f58839466..7f75ebc63b 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -80,7 +80,8 @@ build_distributed_cpu: - scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT parallel: matrix: - - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] + # - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] + - COMPONENT: [common] # BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu] BACKEND: [dace_cpu, dace_gpu] rules: From 9f96b70edce78ffefdda8cd00b82ee7a886fcd43 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 29 Jan 2026 18:48:31 +0100 Subject: [PATCH 08/68] Try using manually built openmpi --- ci/distributed.yml | 9 ++--- ci/docker/base_mpi.Dockerfile | 75 ++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 6 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 7f75ebc63b..4d4d518b58 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -38,11 +38,10 @@ build_distributed_baseimage_aarch64: DOCKERFILE: ci/docker/checkout_mpi.Dockerfile DOCKER_BUILD_ARGS: '["PYVERSION=$PYVERSION", "BASE_IMAGE=${BASE_IMAGE_${PYVERSION_PREFIX}}", "VENV=${UV_PROJECT_ENVIRONMENT}"]' PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi - USE_MPI: YES - SLURM_MPI_TYPE: pmi2 - # SLURM_MPI_TYPE: pmix - # PMIX_MCA_psec: native - # PMIX_MCA_gds: "^shmem2" + USE_MPI: NO + SLURM_MPI_TYPE: pmix + PMIX_MCA_psec: native + PMIX_MCA_gds: "^shmem2" .build_distributed_cpu: extends: [.build_distributed_template] diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index d7c6b379c5..bc18fd95fe 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -13,18 +13,91 @@ RUN apt-get update && \ curl \ git \ libboost-dev \ - libmpich-dev \ + libconfig-dev \ + libcurl4-openssl-dev \ + libfuse-dev \ + libjson-c-dev \ + libnl-3-dev \ libnuma-dev \ libreadline-dev \ + libsensors-dev \ libssl-dev \ libtool \ + libuv1-dev \ + libyaml-dev \ nvidia-cuda-dev \ nvidia-cuda-toolkit \ pkg-config \ + python3 \ strace \ tar \ wget && \ rm -rf /var/lib/apt/lists/* +# Install OpenMPI configured with libfabric, libcxi, and gdrcopy support for use on Alps. +ARG gdrcopy_version=2.5.1 +RUN set -eux; \ + git clone --depth 1 --branch "v${gdrcopy_version}" https://github.com/NVIDIA/gdrcopy.git; \ + cd gdrcopy; \ + make lib -j"$(nproc)" lib_install; \ + cd /; \ + rm -rf /gdrcopy; \ + ldconfig + +ARG cassini_headers_version=release/shs-13.0.0 +RUN set -eux; \ + git clone --depth 1 --branch "${cassini_headers_version}" https://github.com/HewlettPackard/shs-cassini-headers.git; \ + cd shs-cassini-headers; \ + cp -r include/* /usr/include/; \ + cp -r share/* /usr/share/; \ + rm -rf /shs-cassini-headers + +ARG cxi_driver_version=release/shs-13.0.0 +RUN set -eux; \ + git clone --depth 1 --branch "${cxi_driver_version}" https://github.com/HewlettPackard/shs-cxi-driver.git; \ + cd shs-cxi-driver; \ + cp -r include/* /usr/include/; \ + rm -rf /shs-cxi-driver + +ARG libcxi_version=release/shs-13.0.0 +RUN set -eux; \ + git clone --depth 1 --branch "${libcxi_version}" https://github.com/HewlettPackard/shs-libcxi.git; \ + cd shs-libcxi; \ + ./autogen.sh; \ + ./configure \ + --with-cuda; \ + make -j"$(nproc)" install; \ + cd /; \ + rm -rf /shs-libcxi; \ + ldconfig + +ARG libfabric_version=v2.4.0 +RUN set -eux; \ + git clone --depth 1 --branch "${libfabric_version}" https://github.com/ofiwg/libfabric.git; \ + cd libfabric; \ + ./autogen.sh; \ + ./configure \ + --with-cuda \ + --enable-cuda-dlopen \ + --enable-gdrcopy-dlopen \ + --enable-cxi; \ + make -j"$(nproc)" install; \ + cd /; \ + rm -rf /libfabric; \ + ldconfig + +ARG openmpi_version=5.0.9 +RUN set -eux; \ + curl -fsSL "https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-${openmpi_version}.tar.gz" -o /tmp/ompi.tar.gz; \ + tar -C /tmp -xzf /tmp/ompi.tar.gz; \ + cd "/tmp/openmpi-${openmpi_version}"; \ + ./configure \ + --with-ofi \ + --with-cuda=/usr; \ + make -j"$(nproc)" install; \ + cd /; \ + rm -rf "/tmp/openmpi-${openmpi_version}" /tmp/ompi.tar.gz; \ + ldconfig + # Install uv: https://docs.astral.sh/uv/guides/integration/docker COPY --from=ghcr.io/astral-sh/uv:0.9.24@sha256:816fdce3387ed2142e37d2e56e1b1b97ccc1ea87731ba199dc8a25c04e4997c5 /uv /uvx /bin/ From 9fce9b55efbda6dbe3ea10996bb54b96f8569d81 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 30 Jan 2026 13:01:12 +0100 Subject: [PATCH 09/68] Debugging --- ci/distributed.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/distributed.yml b/ci/distributed.yml index 4d4d518b58..9d545192cc 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -76,6 +76,8 @@ build_distributed_cpu: - source ${UV_PROJECT_ENVIRONMENT}/bin/activate - echo "running with $(python --version)" script: + - printenv + - echo USE_MPI=\${USE_MPI} - scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT parallel: matrix: From c6a767ed9a1a1ec893fc3943f5f4a2e686f6bee9 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 30 Jan 2026 15:22:32 +0100 Subject: [PATCH 10/68] Remove debug prints --- ci/distributed.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 9d545192cc..4d4d518b58 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -76,8 +76,6 @@ build_distributed_cpu: - source ${UV_PROJECT_ENVIRONMENT}/bin/activate - echo "running with $(python --version)" script: - - printenv - - echo USE_MPI=\${USE_MPI} - scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT parallel: matrix: From adb1ee6fda08bb5cda894c49634eb1a63656679a Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 30 Jan 2026 15:28:18 +0100 Subject: [PATCH 11/68] Unrevert test download changes --- .../icon4py/model/common/utils/device_utils.py | 3 +++ .../src/icon4py/model/testing/data_handling.py | 5 +++-- .../icon4py/model/testing/fixtures/datatest.py | 8 +------- .../src/icon4py/model/testing/stencil_tests.py | 17 ++++++++++------- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/model/common/src/icon4py/model/common/utils/device_utils.py b/model/common/src/icon4py/model/common/utils/device_utils.py index cacfc8eb64..360a53902a 100644 --- a/model/common/src/icon4py/model/common/utils/device_utils.py +++ b/model/common/src/icon4py/model/common/utils/device_utils.py @@ -37,6 +37,9 @@ def sync(allocator: gtx_typing.FieldBufferAllocationUtil | None = None) -> None: Note: this is and ad-hoc interface, maybe the function should get the device to sync for. """ + # Type annotation already describes that only these types are allowed, but mypy coverage is not great. + # The explicit assert avoids critical mistakes in using this function. + assert allocator is None or gtx_allocators.is_field_allocation_tool(allocator) if allocator is not None and is_cupy_device(allocator): cp.cuda.runtime.deviceSynchronize() diff --git a/model/testing/src/icon4py/model/testing/data_handling.py b/model/testing/src/icon4py/model/testing/data_handling.py index c490c8981b..95bc8b8369 100644 --- a/model/testing/src/icon4py/model/testing/data_handling.py +++ b/model/testing/src/icon4py/model/testing/data_handling.py @@ -9,10 +9,11 @@ import os import pathlib import tarfile -from pathlib import Path +from icon4py.model.testing import config, locking -def download_and_extract(uri: str, dst: Path, data_file: str = "downloaded.tar.gz") -> None: + +def download_and_extract(uri: str, dst: pathlib.Path, data_file: str = "downloaded.tar.gz") -> None: """ Download data archive from remote server. diff --git a/model/testing/src/icon4py/model/testing/fixtures/datatest.py b/model/testing/src/icon4py/model/testing/fixtures/datatest.py index 057235b1eb..0727c962ed 100644 --- a/model/testing/src/icon4py/model/testing/fixtures/datatest.py +++ b/model/testing/src/icon4py/model/testing/fixtures/datatest.py @@ -17,13 +17,7 @@ from icon4py.model.common import model_backends, model_options from icon4py.model.common.constants import RayleighType from icon4py.model.common.grid import base as base_grid -from icon4py.model.testing import ( - config, - data_handling as data, - datatest_utils as dt_utils, - definitions, - locking, -) +from icon4py.model.testing import data_handling as data, datatest_utils as dt_utils, definitions if TYPE_CHECKING: diff --git a/model/testing/src/icon4py/model/testing/stencil_tests.py b/model/testing/src/icon4py/model/testing/stencil_tests.py index f83798f029..ad1bf5e0ac 100644 --- a/model/testing/src/icon4py/model/testing/stencil_tests.py +++ b/model/testing/src/icon4py/model/testing/stencil_tests.py @@ -21,6 +21,7 @@ config as gtx_config, constructors, metrics as gtx_metrics, + named_collections as gtx_named_collections, typing as gtx_typing, ) @@ -34,13 +35,15 @@ def allocate_data( allocator: gtx_typing.FieldBufferAllocationUtil | None, - input_data: dict[str, gtx.Field | tuple[gtx.Field, ...]], -) -> dict[str, gtx.Field | tuple[gtx.Field, ...]]: - _allocate_field = constructors.as_field.partial(allocator=allocator) # type:ignore[attr-defined] # TODO(havogt): check why it doesn't understand the fluid_partial + input_data: dict[ + str, Any + ], # `Field`s or collection of `Field`s are re-allocated, the rest is passed through +) -> dict[str, Any]: + def _allocate_field(f: gtx.Field) -> gtx.Field: + return constructors.as_field(domain=f.domain, data=f.ndarray, allocator=allocator) + input_data = { - k: tuple(_allocate_field(domain=field.domain, data=field.ndarray) for field in v) - if isinstance(v, tuple) - else _allocate_field(domain=v.domain, data=v.ndarray) + k: gtx_named_collections.tree_map_named_collection(_allocate_field)(v) if not gtx.is_scalar_type(v) and k != "domain" else v for k, v in input_data.items() @@ -207,7 +210,7 @@ def _properly_allocated_input_data( self, input_data: dict[str, gtx.Field | tuple[gtx.Field, ...]], backend_like: model_backends.BackendLike, - ) -> dict[str, gtx.Field | tuple[gtx.Field, ...]]: + ) -> dict[str, Any]: # TODO(havogt): this is a workaround, # because in the `input_data` fixture provided by the user # it does not allocate for the correct device. From b0321e77e07460784e93008beaeff3bc4fcffc64 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 30 Jan 2026 15:43:10 +0100 Subject: [PATCH 12/68] Numpy/cupy issues Make revert_repeated_index_to_invalid numpy-only as it's not usefully vectorized --- model/common/src/icon4py/model/common/grid/utils.py | 10 +++++----- model/testing/src/icon4py/model/testing/serialbox.py | 7 ++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/model/common/src/icon4py/model/common/grid/utils.py b/model/common/src/icon4py/model/common/grid/utils.py index 39b48c9dd5..4af7b0a6ba 100644 --- a/model/common/src/icon4py/model/common/grid/utils.py +++ b/model/common/src/icon4py/model/common/grid/utils.py @@ -12,14 +12,14 @@ from icon4py.model.common.grid import gridfile -def revert_repeated_index_to_invalid(offset: np.ndarray, array_ns: ModuleType): +def revert_repeated_index_to_invalid(offset: np.ndarray): num_elements = offset.shape[0] for i in range(num_elements): # convert repeated indices back into -1 - for val in array_ns.flip(offset[i, :]): - if array_ns.count_nonzero(val == offset[i, :]) > 1: - unique_values, counts = array_ns.unique(offset[i, :], return_counts=True) + for val in np.flip(offset[i, :]): + if np.count_nonzero(val == offset[i, :]) > 1: + unique_values, counts = np.unique(offset[i, :], return_counts=True) rep_values = unique_values[counts > 1] - rep_indices = array_ns.where(array_ns.isin(offset[i, :], rep_values))[0] + rep_indices = np.where(np.isin(offset[i, :], rep_values))[0] offset[i, rep_indices[1:]] = gridfile.GridFile.INVALID_INDEX return offset diff --git a/model/testing/src/icon4py/model/testing/serialbox.py b/model/testing/src/icon4py/model/testing/serialbox.py index be4edf41dd..05a3fc53fe 100644 --- a/model/testing/src/icon4py/model/testing/serialbox.py +++ b/model/testing/src/icon4py/model/testing/serialbox.py @@ -72,7 +72,7 @@ def wrapper(self, *args, **kwargs): # as a workaround for the lack of support for optional fields in gt4py. shp = (1,) * len(dims) return gtx.as_field( - dims, np.zeros(shp, dtype=dtype), allocator=self.backend + dims, self.xp.zeros(shp, dtype=dtype), allocator=self.backend ) else: return None @@ -503,10 +503,7 @@ def construct_icon_grid( def potentially_revert_icon_index_transformation(ar): return ar else: - potentially_revert_icon_index_transformation = functools.partial( - grid_utils.revert_repeated_index_to_invalid, - array_ns=data_alloc.import_array_ns(backend), - ) + potentially_revert_icon_index_transformation = grid_utils.revert_repeated_index_to_invalid c2e2c = self.c2e2c() e2c2e = potentially_revert_icon_index_transformation(self.e2c2e()) From c62979c718f488c719a59bb997ea56584adbd684 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 30 Jan 2026 16:32:43 +0100 Subject: [PATCH 13/68] Enable shm, lnx, xpmem support in libfabric --- ci/docker/base_mpi.Dockerfile | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index bc18fd95fe..6d00d12db9 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -71,6 +71,19 @@ RUN set -eux; \ rm -rf /shs-libcxi; \ ldconfig +ARG xpmem_version=0d0bad4e1d07b38d53ecc8f20786bb1328c446da +RUN set -eux; \ + git clone https://github.com/hpc/xpmem.git; \ + cd xpmem; \ + git checkout "${xpmem_version}"; \ + ./autogen.sh; \ + ./configure --disable-kernel-module; \ + make -j"$(nproc)" install; \ + cd /; \ + rm -rf /xpmem; \ + ldconfig + +# NOTE: xpmem is not found correctly without setting the prefix in --enable-xpmem ARG libfabric_version=v2.4.0 RUN set -eux; \ git clone --depth 1 --branch "${libfabric_version}" https://github.com/ofiwg/libfabric.git; \ @@ -80,7 +93,11 @@ RUN set -eux; \ --with-cuda \ --enable-cuda-dlopen \ --enable-gdrcopy-dlopen \ - --enable-cxi; \ + --enable-xpmem=/usr \ + --enable-tcp \ + --enable-cxi \ + --enable-lnx \ + --enable-shm; \ make -j"$(nproc)" install; \ cd /; \ rm -rf /libfabric; \ From b4071d03f696503dbe4e158c7308372bca3a3362 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 30 Jan 2026 16:45:50 +0100 Subject: [PATCH 14/68] Linting --- model/common/src/icon4py/model/common/grid/utils.py | 1 - model/testing/src/icon4py/model/testing/serialbox.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/model/common/src/icon4py/model/common/grid/utils.py b/model/common/src/icon4py/model/common/grid/utils.py index 4af7b0a6ba..dbb3d69449 100644 --- a/model/common/src/icon4py/model/common/grid/utils.py +++ b/model/common/src/icon4py/model/common/grid/utils.py @@ -5,7 +5,6 @@ # # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause -from types import ModuleType import numpy as np diff --git a/model/testing/src/icon4py/model/testing/serialbox.py b/model/testing/src/icon4py/model/testing/serialbox.py index 05a3fc53fe..3bb52a9ed1 100644 --- a/model/testing/src/icon4py/model/testing/serialbox.py +++ b/model/testing/src/icon4py/model/testing/serialbox.py @@ -503,7 +503,9 @@ def construct_icon_grid( def potentially_revert_icon_index_transformation(ar): return ar else: - potentially_revert_icon_index_transformation = grid_utils.revert_repeated_index_to_invalid + potentially_revert_icon_index_transformation = ( + grid_utils.revert_repeated_index_to_invalid + ) c2e2c = self.c2e2c() e2c2e = potentially_revert_icon_index_transformation(self.e2c2e()) From 6eb3d8d4379b10a9efedeadc84888b54c8e48852 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 30 Jan 2026 19:47:59 +0100 Subject: [PATCH 15/68] Enable GPU support for GHEX --- ci/docker/checkout_mpi.Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/docker/checkout_mpi.Dockerfile b/ci/docker/checkout_mpi.Dockerfile index 4cbf1d32c0..01e26702b4 100644 --- a/ci/docker/checkout_mpi.Dockerfile +++ b/ci/docker/checkout_mpi.Dockerfile @@ -7,5 +7,9 @@ WORKDIR /icon4py ARG PYVERSION ARG VENV ENV UV_PROJECT_ENVIRONMENT=$VENV -ENV MPI4PY_BUILD_BACKEND="scikit-build-core" +ENV MPI4PY_BUILD_BACKEND=scikit-build-core +ENV GHEX_USE_GPU=ON +ENV GHEX_GPU_TYPE=NVIDIA +ENV GHEX_GPU_ARCH=90 +ENV GHEX_TRANSPORT_BACKEND=MPI RUN uv sync --extra all --extra cuda12 --python=$PYVERSION From 28b1b1bbdae5a5623f941b238df8106c1165cab6 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Sun, 1 Feb 2026 21:13:08 +0100 Subject: [PATCH 16/68] Set appropriate gcc for cuda --- ci/docker/base_mpi.Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index 6d00d12db9..eb46a926a9 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -27,6 +27,7 @@ RUN apt-get update && \ libyaml-dev \ nvidia-cuda-dev \ nvidia-cuda-toolkit \ + nvidia-cuda-toolkit-gcc \ pkg-config \ python3 \ strace \ @@ -34,6 +35,10 @@ RUN apt-get update && \ wget && \ rm -rf /var/lib/apt/lists/* +ENV CC=/usr/bin/cuda-gcc +ENV CXX=/usr/bin/cuda-g++ +ENV CUDAHOSTCXX=/usr/bin/cuda-g++ + # Install OpenMPI configured with libfabric, libcxi, and gdrcopy support for use on Alps. ARG gdrcopy_version=2.5.1 RUN set -eux; \ From 73a5b5bb1bf26a2d056b66b8db73eaa1a0538441 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Sun, 1 Feb 2026 21:13:20 +0100 Subject: [PATCH 17/68] Explicitly set OpenMPI settings --- ci/distributed.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 4d4d518b58..3c978a5f69 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -38,10 +38,6 @@ build_distributed_baseimage_aarch64: DOCKERFILE: ci/docker/checkout_mpi.Dockerfile DOCKER_BUILD_ARGS: '["PYVERSION=$PYVERSION", "BASE_IMAGE=${BASE_IMAGE_${PYVERSION_PREFIX}}", "VENV=${UV_PROJECT_ENVIRONMENT}"]' PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi - USE_MPI: NO - SLURM_MPI_TYPE: pmix - PMIX_MCA_psec: native - PMIX_MCA_gds: "^shmem2" .build_distributed_cpu: extends: [.build_distributed_template] @@ -66,6 +62,16 @@ build_distributed_cpu: ICON4PY_ENABLE_GRID_DOWNLOAD: false ICON4PY_ENABLE_TESTDATA_DOWNLOAD: false CSCS_ADDITIONAL_MOUNTS: '["/capstor/store/cscs/userlab/d126/icon4py/ci/testdata_003:$TEST_DATA_PATH"]' + # Do not use libfabric from the host system. Libfabric with slingshot + # support is built into the container image. + USE_MPI: NO + # Use libfabric slingshot (cxi) provider and recommended settings from + # https://docs.cscs.ch/software/communication/openmpi. + SLURM_MPI_TYPE: pmix + PMIX_MCA_psec: native + FI_PROVIDER: cxi + OMPI_MCA_pml: cm + OMPI_MCA_mtl: ofi .test_distributed_aarch64: stage: test From d8e90e4fe01750202ea403fc88d3d44bdb282513 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 3 Feb 2026 11:52:51 +0100 Subject: [PATCH 18/68] Don't dlopen cuda and gdrcopy --- ci/docker/base_mpi.Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index eb46a926a9..383ffe04c9 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -96,8 +96,6 @@ RUN set -eux; \ ./autogen.sh; \ ./configure \ --with-cuda \ - --enable-cuda-dlopen \ - --enable-gdrcopy-dlopen \ --enable-xpmem=/usr \ --enable-tcp \ --enable-cxi \ From 67cfdb51d077ddbeb209e568496f6349eda4eceb Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 3 Feb 2026 13:29:24 +0100 Subject: [PATCH 19/68] Update comments and clean up options --- ci/docker/base_mpi.Dockerfile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index 383ffe04c9..f849c4d626 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -39,7 +39,9 @@ ENV CC=/usr/bin/cuda-gcc ENV CXX=/usr/bin/cuda-g++ ENV CUDAHOSTCXX=/usr/bin/cuda-g++ -# Install OpenMPI configured with libfabric, libcxi, and gdrcopy support for use on Alps. +# Install OpenMPI configured with libfabric, libcxi, and gdrcopy support for use +# on Alps. This is based on examples in +# https://github.com/eth-cscs/cray-network-stack. ARG gdrcopy_version=2.5.1 RUN set -eux; \ git clone --depth 1 --branch "v${gdrcopy_version}" https://github.com/NVIDIA/gdrcopy.git; \ @@ -88,7 +90,8 @@ RUN set -eux; \ rm -rf /xpmem; \ ldconfig -# NOTE: xpmem is not found correctly without setting the prefix in --enable-xpmem +# NOTE: xpmem is not found correctly without setting the prefix explicitly in +# --enable-xpmem ARG libfabric_version=v2.4.0 RUN set -eux; \ git clone --depth 1 --branch "${libfabric_version}" https://github.com/ofiwg/libfabric.git; \ @@ -98,9 +101,7 @@ RUN set -eux; \ --with-cuda \ --enable-xpmem=/usr \ --enable-tcp \ - --enable-cxi \ - --enable-lnx \ - --enable-shm; \ + --enable-cxi; \ make -j"$(nproc)" install; \ cd /; \ rm -rf /libfabric; \ From c81af9ebdb020011204538f8b1008d66f9e8d4f4 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 3 Feb 2026 13:29:38 +0100 Subject: [PATCH 20/68] Try ubuntu lts release for distributed ci --- ci/docker/base_mpi.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index f849c4d626..c48241855e 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:25.04 +FROM ubuntu:24.04 ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 From 790612a0ee7b1e5bd390169e7b15a3c50913d39b Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 3 Feb 2026 13:31:51 +0100 Subject: [PATCH 21/68] Set gpu binding through SLURM_GPUS_PER_TASK --- ci/distributed.yml | 1 + scripts/ci-mpi-wrapper.sh | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 3c978a5f69..c0d835e2fe 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -58,6 +58,7 @@ build_distributed_cpu: SLURM_JOB_NUM_NODES: 1 SLURM_CPU_BIND: 'verbose' SLURM_NTASKS: 4 + SLURM_GPUS_PER_TASK: 1 TEST_DATA_PATH: "/icon4py/testdata" ICON4PY_ENABLE_GRID_DOWNLOAD: false ICON4PY_ENABLE_TESTDATA_DOWNLOAD: false diff --git a/scripts/ci-mpi-wrapper.sh b/scripts/ci-mpi-wrapper.sh index c0aa25d41f..900dd340ae 100755 --- a/scripts/ci-mpi-wrapper.sh +++ b/scripts/ci-mpi-wrapper.sh @@ -17,8 +17,6 @@ else exit 1 fi -export CUDA_VISIBLE_DEVICES="${rank}" - log_file="${CI_PROJECT_DIR:+${CI_PROJECT_DIR}/}pytest-log-rank-${rank}.txt" if [[ "${rank}" -eq 0 ]]; then From 64482e8fa1eefb5200ac3fbe78406d00e07093c9 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 3 Feb 2026 13:32:32 +0100 Subject: [PATCH 22/68] Enable all tests again --- ci/distributed.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index c0d835e2fe..d8e2a1068c 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -86,10 +86,8 @@ build_distributed_cpu: - scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT parallel: matrix: - # - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] - - COMPONENT: [common] - # BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu] - BACKEND: [dace_cpu, dace_gpu] + - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] + BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu, gtfn_gpu] rules: - if: $COMPONENT == 'atmosphere/diffusion' variables: From b3eef3a6c78072d59df6009425a39fa7a6eaf24d Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 3 Feb 2026 13:33:25 +0100 Subject: [PATCH 23/68] Clean up names in distributed.yml --- ci/distributed.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index d8e2a1068c..f8600e85b1 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -39,21 +39,21 @@ build_distributed_baseimage_aarch64: DOCKER_BUILD_ARGS: '["PYVERSION=$PYVERSION", "BASE_IMAGE=${BASE_IMAGE_${PYVERSION_PREFIX}}", "VENV=${UV_PROJECT_ENVIRONMENT}"]' PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi -.build_distributed_cpu: +.build_distributed: extends: [.build_distributed_template] variables: UV_PROJECT_ENVIRONMENT: venv_dist -build_distributed_cpu: +build_distributed: stage: image - extends: [.container-builder-cscs-gh200, .build_distributed_cpu] + extends: [.container-builder-cscs-gh200, .build_distributed] needs: [build_distributed_baseimage_aarch64] .test_template_distributed: timeout: 8h image: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi - extends: [.container-runner-santis-gh200, .build_distributed_cpu] - needs: [build_distributed_cpu] + extends: [.container-runner-santis-gh200, .build_distributed] + needs: [build_distributed] variables: SLURM_JOB_NUM_NODES: 1 SLURM_CPU_BIND: 'verbose' From d6f71d60fb49e6d92fe1a185aaf6a061a654bcc1 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 3 Feb 2026 16:02:30 +0100 Subject: [PATCH 24/68] Update base image to ubuntu 25.10 --- ci/docker/base_mpi.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index c48241855e..a600b4ff1c 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:24.04 +FROM ubuntu:25.10 ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 From 518bbdee8c8d92e884267b4fd5a157eaaac29b2e Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 4 Feb 2026 14:19:58 +0100 Subject: [PATCH 25/68] Mark distributed compute_geofac_div test embedded only, like single-rank test --- .../common/decomposition/mpi_tests/test_mpi_decomposition.py | 1 + 1 file changed, 1 insertion(+) diff --git a/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py b/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py index 5bf956428d..d8f6f2aa88 100644 --- a/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py +++ b/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py @@ -280,6 +280,7 @@ def test_exchange_on_dummy_data( @pytest.mark.mpi @pytest.mark.datatest +@pytest.mark.embedded_only @pytest.mark.parametrize("processor_props", [False], indirect=True) def test_halo_exchange_for_sparse_field( interpolation_savepoint: serialbox.InterpolationSavepoint, From c1eed7f8cc6a57fcc7c96ce55c511fa1f4ed08eb Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 4 Feb 2026 15:19:00 +0100 Subject: [PATCH 26/68] Use philip's async-mpi branch (fixes gpu buffer stride computation) --- pyproject.toml | 2 +- uv.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e349356eb7..df2c6e3d98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -361,7 +361,7 @@ url = 'https://gridtools.github.io/pypi/' [tool.uv.sources] dace = {index = "gridtools"} -ghex = {git = "https://github.com/msimberg/GHEX.git", branch = "async-mpi"} +ghex = {git = "https://github.com/philip-paul-mueller/GHEX.git", branch = "phimuell__async-mpi-2"} # gt4py = {git = "https://github.com/GridTools/gt4py", branch = "main"} # gt4py = {index = "test.pypi"} icon4py-atmosphere-advection = {workspace = true} diff --git a/uv.lock b/uv.lock index f5641ba1e4..aca8ec23cc 100644 --- a/uv.lock +++ b/uv.lock @@ -1362,7 +1362,7 @@ wheels = [ [[package]] name = "ghex" version = "0.4.1" -source = { git = "https://github.com/msimberg/GHEX.git?branch=async-mpi#6d896166994cedbcfc50da1873239a5edb212e3f" } +source = { git = "https://github.com/philip-paul-mueller/GHEX.git?branch=phimuell__async-mpi-2#80c0650fdae40bdd40e0435e5687267bada4cdd2" } dependencies = [ { name = "mpi4py" }, { name = "numpy" }, @@ -1887,7 +1887,7 @@ requires-dist = [ { name = "cupy-cuda12x", marker = "extra == 'cuda12'", specifier = ">=13.0" }, { name = "dace", specifier = "==43!2026.1.21", index = "https://gridtools.github.io/pypi/" }, { name = "datashader", marker = "extra == 'io'", specifier = ">=0.16.1" }, - { name = "ghex", marker = "extra == 'distributed'", git = "https://github.com/msimberg/GHEX.git?branch=async-mpi" }, + { name = "ghex", marker = "extra == 'distributed'", git = "https://github.com/philip-paul-mueller/GHEX.git?branch=phimuell__async-mpi-2" }, { name = "gt4py", specifier = "==1.1.3" }, { name = "gt4py", extras = ["cuda11"], marker = "extra == 'cuda11'" }, { name = "gt4py", extras = ["cuda12"], marker = "extra == 'cuda12'" }, From d08b60cf14d69dd4c3ec16e546621e64ea0d1ba9 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 4 Feb 2026 16:44:34 +0100 Subject: [PATCH 27/68] Increase time limit for distributed dace tests --- ci/distributed.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 3a262d7de0..1828a3f4ea 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -92,9 +92,9 @@ build_distributed: - if: $COMPONENT == 'atmosphere/diffusion' variables: SLURM_TIMELIMIT: '00:05:00' - - if: $COMPONENT == 'atmosphere/dycore' && $BACKEND == 'dace_cpu' + - if: $COMPONENT == 'atmosphere/dycore' && ($BACKEND == 'dace_cpu' || $BACKEND == 'dace_gpu') variables: - SLURM_TIMELIMIT: '00:20:00' + SLURM_TIMELIMIT: '00:30:00' - if: $COMPONENT == 'atmosphere/dycore' variables: SLURM_TIMELIMIT: '00:15:00' From 148850c271ccc19c6c8b333b0190c247e19cb2bd Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 4 Feb 2026 16:56:17 +0100 Subject: [PATCH 28/68] Increase time limit for distributed dace_gpu common tests --- ci/distributed.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/distributed.yml b/ci/distributed.yml index 1828a3f4ea..8c22a08611 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -95,6 +95,9 @@ build_distributed: - if: $COMPONENT == 'atmosphere/dycore' && ($BACKEND == 'dace_cpu' || $BACKEND == 'dace_gpu') variables: SLURM_TIMELIMIT: '00:30:00' + - if: $COMPONENT == 'common' && $BACKEND == 'dace_gpu' + variables: + SLURM_TIMELIMIT: '00:45:00' - if: $COMPONENT == 'atmosphere/dycore' variables: SLURM_TIMELIMIT: '00:15:00' From 0c727f58ff443cf7d049a36bb5d383d0603ec00e Mon Sep 17 00:00:00 2001 From: Jacopo Canton Date: Thu, 5 Feb 2026 12:52:25 +0100 Subject: [PATCH 29/68] sorry2 --- ci/distributed.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 192838a0f5..4b4038d047 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -59,7 +59,7 @@ build_distributed: SLURM_CPU_BIND: 'verbose' SLURM_NTASKS: 4 SLURM_GPUS_PER_TASK: 1 - TEST_DATA_PATH: "/icon4py/testdata" + ICON4PY_TEST_DATA_PATH: "/icon4py/testdata" ICON4PY_ENABLE_GRID_DOWNLOAD: false ICON4PY_ENABLE_TESTDATA_DOWNLOAD: false CSCS_ADDITIONAL_MOUNTS: '["/capstor/store/cscs/userlab/cwci02/icon4py/ci/testdata:$ICON4PY_TEST_DATA_PATH"]' From ce21e8f73a27ef1af726230bf7a89f6493f84f25 Mon Sep 17 00:00:00 2001 From: Nicoletta Farabullini <41536517+nfarabullini@users.noreply.github.com> Date: Wed, 4 Mar 2026 16:11:54 +0100 Subject: [PATCH 30/68] modified np strict references with broader array_ns --- .../common/interpolation/interpolation_fields.py | 15 ++++++++++----- .../src/icon4py/model/common/math/projection.py | 14 ++++++++++---- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py index c04aa16f20..c1c0c2b954 100644 --- a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py +++ b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py @@ -1181,11 +1181,12 @@ def compute_lsq_weights_c( lsq_weights_c_jc: data_alloc.NDArray, lsq_dim_stencil: int, lsq_wgt_exp: int, + array_ns: ModuleType = np, ) -> data_alloc.NDArray: for js in range(lsq_dim_stencil): - z_norm = np.sqrt(np.dot(z_dist_g[js, :], z_dist_g[js, :])) + z_norm = array_ns.sqrt(array_ns.dot(z_dist_g[js, :], z_dist_g[js, :])) lsq_weights_c_jc[js] = 1.0 / (z_norm**lsq_wgt_exp) - return lsq_weights_c_jc / np.max(lsq_weights_c_jc) + return lsq_weights_c_jc / array_ns.max(lsq_weights_c_jc) def compute_z_lsq_mat_c( @@ -1234,9 +1235,13 @@ def compute_lsq_coeffs( match base_grid.GeometryType(geometry_type): case base_grid.GeometryType.ICOSAHEDRON: for js in range(lsq_dim_stencil): - z_dist_g[:, js, :] = np.asarray( + z_dist_g[:, js, :] = array_ns.asarray( gnomonic_proj( - cell_lon[:], cell_lat[:], cell_lon[c2e2c[:, js]], cell_lat[c2e2c[:, js]] + cell_lon[:], + cell_lat[:], + cell_lon[c2e2c[:, js]], + cell_lat[c2e2c[:, js]], + array_ns, ) ).T @@ -1265,7 +1270,7 @@ def compute_lsq_coeffs( for jc in range(start_idx, min_rlcell_int): lsq_weights_c[jc, :] = compute_lsq_weights_c( - z_dist_g[jc, :, :], lsq_weights_c[jc, :], lsq_dim_stencil, lsq_wgt_exp + z_dist_g[jc, :, :], lsq_weights_c[jc, :], lsq_dim_stencil, lsq_wgt_exp, array_ns ) z_lsq_mat_c[jc, js, :lsq_dim_unk] = compute_z_lsq_mat_c( cell_owner_mask, diff --git a/model/common/src/icon4py/model/common/math/projection.py b/model/common/src/icon4py/model/common/math/projection.py index fcec8fbc5f..a696cbbadb 100644 --- a/model/common/src/icon4py/model/common/math/projection.py +++ b/model/common/src/icon4py/model/common/math/projection.py @@ -5,7 +5,7 @@ # # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause - +from types import ModuleType import numpy as np @@ -17,6 +17,7 @@ def gnomonic_proj( lat_c: data_alloc.NDArray, lon: data_alloc.NDArray, lat: data_alloc.NDArray, + array_ns: ModuleType = np, ) -> tuple[data_alloc.NDArray, data_alloc.NDArray]: """ Compute gnomonic projection. @@ -38,11 +39,16 @@ def gnomonic_proj( TODO: replace this with a suitable library call """ - cosc = np.sin(lat_c) * np.sin(lat) + np.cos(lat_c) * np.cos(lat) * np.cos(lon - lon_c) + cosc = array_ns.sin(lat_c) * array_ns.sin(lat) + array_ns.cos(lat_c) * array_ns.cos( + lat + ) * array_ns.cos(lon - lon_c) zk = 1.0 / cosc - x = zk * np.cos(lat) * np.sin(lon - lon_c) - y = zk * (np.cos(lat_c) * np.sin(lat) - np.sin(lat_c) * np.cos(lat) * np.cos(lon - lon_c)) + x = zk * array_ns.cos(lat) * array_ns.sin(lon - lon_c) + y = zk * ( + array_ns.cos(lat_c) * array_ns.sin(lat) + - array_ns.sin(lat_c) * array_ns.cos(lat) * array_ns.cos(lon - lon_c) + ) return x, y From 878db70bcc15993c41e62f57aadadc3a0097e1bd Mon Sep 17 00:00:00 2001 From: Nicoletta Farabullini <41536517+nfarabullini@users.noreply.github.com> Date: Thu, 5 Mar 2026 15:06:45 +0100 Subject: [PATCH 31/68] Update interpolation_fields.py --- .../model/common/interpolation/interpolation_fields.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py index c1c0c2b954..e2eb4803d3 100644 --- a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py +++ b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py @@ -13,7 +13,6 @@ from typing import Final import numpy as np -import scipy from gt4py import next as gtx from gt4py.next import where @@ -1163,11 +1162,12 @@ def compute_lsq_pseudoinv( min_rlcell_int: int, lsq_dim_unk: int, lsq_dim_c: int, + array_ns: ModuleType = np, ) -> data_alloc.NDArray: for jjb in range(lsq_dim_c): for jjk in range(lsq_dim_unk): for jc in range(start_idx, min_rlcell_int): - u, s, v_t, _ = scipy.linalg.lapack.dgesdd(z_lsq_mat_c[jc, :, :]) + u, s, v_t, _ = array_ns.linalg.svd(z_lsq_mat_c[jc, :, :]) if cell_owner_mask[jc]: lsq_pseudoinv[jc, :lsq_dim_unk, jjb] = ( lsq_pseudoinv[jc, :lsq_dim_unk, jjb] @@ -1294,6 +1294,7 @@ def compute_lsq_coeffs( min_rlcell_int, lsq_dim_unk, lsq_dim_c, + array_ns ) if exchange != decomposition.single_node_default: exchange(lsq_pseudoinv[:, 0, :]) From c449030065bc88edc414e5060c1f7a9916bc717e Mon Sep 17 00:00:00 2001 From: Nicoletta Farabullini <41536517+nfarabullini@users.noreply.github.com> Date: Thu, 5 Mar 2026 15:08:53 +0100 Subject: [PATCH 32/68] ran pre-commit --- .../icon4py/model/common/interpolation/interpolation_fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py index e2eb4803d3..b74df27ac7 100644 --- a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py +++ b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py @@ -1294,7 +1294,7 @@ def compute_lsq_coeffs( min_rlcell_int, lsq_dim_unk, lsq_dim_c, - array_ns + array_ns, ) if exchange != decomposition.single_node_default: exchange(lsq_pseudoinv[:, 0, :]) From 9460369d6e2d3facb86e3fd31680202197686b55 Mon Sep 17 00:00:00 2001 From: Nicoletta Farabullini <41536517+nfarabullini@users.noreply.github.com> Date: Thu, 5 Mar 2026 15:34:08 +0100 Subject: [PATCH 33/68] removed additional but unused return val --- .../icon4py/model/common/interpolation/interpolation_fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py index b74df27ac7..4d0d6c7f74 100644 --- a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py +++ b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py @@ -1167,7 +1167,7 @@ def compute_lsq_pseudoinv( for jjb in range(lsq_dim_c): for jjk in range(lsq_dim_unk): for jc in range(start_idx, min_rlcell_int): - u, s, v_t, _ = array_ns.linalg.svd(z_lsq_mat_c[jc, :, :]) + u, s, v_t = array_ns.linalg.svd(z_lsq_mat_c[jc, :, :]) if cell_owner_mask[jc]: lsq_pseudoinv[jc, :lsq_dim_unk, jjb] = ( lsq_pseudoinv[jc, :lsq_dim_unk, jjb] From c3606ae66b1da39da7c9ee87c7c1106b553c4692 Mon Sep 17 00:00:00 2001 From: Nicoletta Farabullini <41536517+nfarabullini@users.noreply.github.com> Date: Thu, 5 Mar 2026 16:21:57 +0100 Subject: [PATCH 34/68] Update interpolation_fields.py --- .../model/common/interpolation/interpolation_fields.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py index 4d0d6c7f74..8a8d92318f 100644 --- a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py +++ b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py @@ -1258,14 +1258,14 @@ def compute_lsq_coeffs( cc_cv = (cell_center_x[jc], cell_center_y[jc]) for js in range(lsq_dim_stencil): - cc_cell[js, :] = diff_on_edges_torus_numpy( + cc_cell[js, :] = array_ns.asarray(diff_on_edges_torus_numpy( cell_center_x[jc], cell_center_y[jc], cell_center_x[ilc_s][js], cell_center_y[ilc_s][js], domain_length, domain_height, - ) + )) z_dist_g[jc, :, :] = cc_cell - cc_cv for jc in range(start_idx, min_rlcell_int): From 81375cac88716783b1b3dca309e272ace34873f5 Mon Sep 17 00:00:00 2001 From: Nicoletta Farabullini <41536517+nfarabullini@users.noreply.github.com> Date: Thu, 5 Mar 2026 16:27:51 +0100 Subject: [PATCH 35/68] ran pre-commit --- .../interpolation/interpolation_fields.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py index 8a8d92318f..49532d5219 100644 --- a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py +++ b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py @@ -1258,14 +1258,16 @@ def compute_lsq_coeffs( cc_cv = (cell_center_x[jc], cell_center_y[jc]) for js in range(lsq_dim_stencil): - cc_cell[js, :] = array_ns.asarray(diff_on_edges_torus_numpy( - cell_center_x[jc], - cell_center_y[jc], - cell_center_x[ilc_s][js], - cell_center_y[ilc_s][js], - domain_length, - domain_height, - )) + cc_cell[js, :] = array_ns.asarray( + diff_on_edges_torus_numpy( + cell_center_x[jc], + cell_center_y[jc], + cell_center_x[ilc_s][js], + cell_center_y[ilc_s][js], + domain_length, + domain_height, + ) + ) z_dist_g[jc, :, :] = cc_cell - cc_cv for jc in range(start_idx, min_rlcell_int): From 6362e62be71d482a25e98b8584e1c2abc682d310 Mon Sep 17 00:00:00 2001 From: Nicoletta Farabullini <41536517+nfarabullini@users.noreply.github.com> Date: Fri, 6 Mar 2026 08:49:08 +0100 Subject: [PATCH 36/68] small fix to tuple --- .../icon4py/model/common/interpolation/interpolation_fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py index 49532d5219..7b67e7c893 100644 --- a/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py +++ b/model/common/src/icon4py/model/common/interpolation/interpolation_fields.py @@ -1256,7 +1256,7 @@ def compute_lsq_coeffs( ilc_s = c2e2c[jc, :lsq_dim_stencil] cc_cell = array_ns.zeros((lsq_dim_stencil, 2)) - cc_cv = (cell_center_x[jc], cell_center_y[jc]) + cc_cv = array_ns.asarray((cell_center_x[jc], cell_center_y[jc])) for js in range(lsq_dim_stencil): cc_cell[js, :] = array_ns.asarray( diff_on_edges_torus_numpy( From 000efca9298cddf0a14ff61251dd5cf4259b405b Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 12 Mar 2026 15:30:45 +0100 Subject: [PATCH 37/68] Fix numpy/cupy inconsistency in test_parallel_grid_manager.py --- .../mpi_tests/test_parallel_grid_manager.py | 19 +++++++++++++------ .../tests/common/grid/mpi_tests/utils.py | 11 ++++++++--- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index 70c61ba350..b416c8f7ec 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -137,10 +137,10 @@ def check_local_global_field( if check_halos: np.testing.assert_allclose( global_reference_field[ - decomposition_info.global_index(dim, decomp_defs.DecompositionInfo.EntryType.HALO) + data_alloc.as_numpy(decomposition_info.global_index(dim, decomp_defs.DecompositionInfo.EntryType.HALO)) ], local_field[ - decomposition_info.local_index(dim, decomp_defs.DecompositionInfo.EntryType.HALO) + data_alloc.as_numpy(decomposition_info.local_index(dim, decomp_defs.DecompositionInfo.EntryType.HALO)) ], atol=1e-9, verbose=True, @@ -150,7 +150,7 @@ def check_local_global_field( # field, by gathering owned entries to the first rank. This ensures that in # total we have the full global field distributed on all ranks. owned_entries = local_field[ - decomposition_info.local_index(dim, decomp_defs.DecompositionInfo.EntryType.OWNED) + data_alloc.as_numpy(decomposition_info.local_index(dim, decomp_defs.DecompositionInfo.EntryType.OWNED)) ] gathered_sizes, gathered_field = gather_field(owned_entries, processor_props) @@ -262,11 +262,13 @@ def test_geometry_fields_compare_single_multi_rank( if attrs_name in embedded_broken_fields and test_utils.is_embedded(backend): pytest.xfail(f"Field {attrs_name} can't be computed with the embedded backend") + allocator = model_backends.get_allocator(backend) + # TODO(msimberg): Add fixtures for single/multi-rank # grid/geometry/interpolation/metrics factories. grid_file = grid_utils._download_grid_file(grid_description) _log.info(f"running on {processor_props.comm} with {processor_props.comm_size} ranks") - single_rank_grid_manager = utils.run_grid_manager_for_single_rank(grid_file) + single_rank_grid_manager = utils.run_grid_manager_for_single_rank(grid_file, allocator=allocator) single_rank_geometry = geometry.GridGeometry( backend=backend, grid=single_rank_grid_manager.grid, @@ -283,6 +285,7 @@ def test_geometry_fields_compare_single_multi_rank( file=grid_file, run_properties=processor_props, decomposer=decomp.MetisDecomposer(), + allocator=allocator, ) _log.info( f"rank = {processor_props.rank} : {multi_rank_grid_manager.decomposition_info.get_horizontal_size()!r}" @@ -359,9 +362,11 @@ def test_interpolation_fields_compare_single_multi_rank( if attrs_name in embedded_broken_fields and test_utils.is_embedded(backend): pytest.xfail(f"Field {attrs_name} can't be computed with the embedded backend") + allocator = model_backends.get_allocator(backend) + file = grid_utils.resolve_full_grid_file_name(experiment.grid) _log.info(f"running on {processor_props.comm} with {processor_props.comm_size} ranks") - single_rank_grid_manager = utils.run_grid_manager_for_single_rank(file) + single_rank_grid_manager = utils.run_grid_manager_for_single_rank(file, allocator=allocator) single_rank_geometry = geometry.GridGeometry( backend=backend, grid=single_rank_grid_manager.grid, @@ -386,6 +391,7 @@ def test_interpolation_fields_compare_single_multi_rank( file=file, run_properties=processor_props, decomposer=decomp.MetisDecomposer(), + allocator=allocator, ) _log.info( f"rank = {processor_props.rank} : {multi_rank_grid_manager.decomposition_info.get_horizontal_size()!r}" @@ -535,7 +541,7 @@ def test_metrics_fields_compare_single_multi_rank( ) _log.info(f"running on {processor_props.comm} with {processor_props.comm_size} ranks") - single_rank_grid_manager = utils.run_grid_manager_for_single_rank(file, experiment.num_levels) + single_rank_grid_manager = utils.run_grid_manager_for_single_rank(file, experiment.num_levels, allocator=allocator) single_rank_geometry = geometry.GridGeometry( backend=backend, grid=single_rank_grid_manager.grid, @@ -584,6 +590,7 @@ def test_metrics_fields_compare_single_multi_rank( run_properties=processor_props, decomposer=decomp.MetisDecomposer(), num_levels=experiment.num_levels, + allocator=allocator, ) _log.info( f"rank = {processor_props.rank} : {multi_rank_grid_manager.decomposition_info.get_horizontal_size()!r}" diff --git a/model/common/tests/common/grid/mpi_tests/utils.py b/model/common/tests/common/grid/mpi_tests/utils.py index 511ec82f77..c94eeaa3af 100644 --- a/model/common/tests/common/grid/mpi_tests/utils.py +++ b/model/common/tests/common/grid/mpi_tests/utils.py @@ -8,6 +8,8 @@ import pathlib +import gt4py.next as gtx + from icon4py.model.common.decomposition import decomposer as decomp, definitions as decomp_defs from icon4py.model.common.grid import grid_manager as gm, vertical as v_grid @@ -22,14 +24,16 @@ def _grid_manager(file: pathlib.Path, num_levels: int) -> gm.GridManager: def run_grid_manager_for_single_rank( - file: pathlib.Path, num_levels: int = NUM_LEVELS + file: pathlib.Path, + allocator: gtx.typing.Allocator, + num_levels: int = NUM_LEVELS, ) -> gm.GridManager: manager = _grid_manager(file, num_levels) manager( keep_skip_values=True, run_properties=decomp_defs.SingleNodeProcessProperties(), decomposer=decomp.SingleNodeDecomposer(), - allocator=None, + allocator=allocator, ) return manager @@ -38,10 +42,11 @@ def run_grid_manager_for_multi_rank( file: pathlib.Path, run_properties: decomp_defs.ProcessProperties, decomposer: decomp.Decomposer, + allocator: gtx.typing.Allocator, num_levels: int = NUM_LEVELS, ) -> gm.GridManager: manager = _grid_manager(file, num_levels) manager( - keep_skip_values=True, allocator=None, run_properties=run_properties, decomposer=decomposer + keep_skip_values=True, allocator=allocator, run_properties=run_properties, decomposer=decomposer ) return manager From b0c8f5e674f38a09873e4b878f843601113e748a Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 12 Mar 2026 15:57:24 +0100 Subject: [PATCH 38/68] Loosen rbf tolerance again for gpu --- .../tests/common/grid/mpi_tests/test_parallel_grid_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index b416c8f7ec..da82327a0d 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -177,7 +177,8 @@ def check_local_global_field( f" rank = {processor_props.rank}: SHAPES: global reference field {global_reference_field.shape}, gathered = {gathered_field.shape}" ) - np.testing.assert_allclose(sorted_, global_reference_field, atol=1e-9, verbose=True) + # TODO(msimberg): The tolerance is high only for RBF fields. Fix it. + np.testing.assert_allclose(sorted_, global_reference_field, atol=3e-9, verbose=True) # These fields can't be computed with the embedded backend for one reason or From f7f7dcdfb39339ad5892bd9959e94ff70f831ba5 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 12 Mar 2026 16:16:38 +0100 Subject: [PATCH 39/68] Fix allocator argument --- .../tests/common/grid/mpi_tests/test_parallel_grid_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index da82327a0d..14183a137f 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -542,7 +542,7 @@ def test_metrics_fields_compare_single_multi_rank( ) _log.info(f"running on {processor_props.comm} with {processor_props.comm_size} ranks") - single_rank_grid_manager = utils.run_grid_manager_for_single_rank(file, experiment.num_levels, allocator=allocator) + single_rank_grid_manager = utils.run_grid_manager_for_single_rank(file, allocator=allocator, num_levels=experiment.num_levels) single_rank_geometry = geometry.GridGeometry( backend=backend, grid=single_rank_grid_manager.grid, @@ -590,8 +590,8 @@ def test_metrics_fields_compare_single_multi_rank( file=file, run_properties=processor_props, decomposer=decomp.MetisDecomposer(), - num_levels=experiment.num_levels, allocator=allocator, + num_levels=experiment.num_levels, ) _log.info( f"rank = {processor_props.rank} : {multi_rank_grid_manager.decomposition_info.get_horizontal_size()!r}" From 17443181290dd7b4e0073a81b3c5c1fca8a8e761 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 16 Mar 2026 14:50:44 +0100 Subject: [PATCH 40/68] Specify backend for all metrics fields --- .../src/icon4py/model/common/metrics/metrics_factory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model/common/src/icon4py/model/common/metrics/metrics_factory.py b/model/common/src/icon4py/model/common/metrics/metrics_factory.py index 1cc16324a3..f74180348d 100644 --- a/model/common/src/icon4py/model/common/metrics/metrics_factory.py +++ b/model/common/src/icon4py/model/common/metrics/metrics_factory.py @@ -870,7 +870,7 @@ def _register_computed_fields(self) -> None: # noqa: PLR0915 [too-many-statemen self.register_provider(compute_maxslp_maxhgtd) compute_weighted_cell_neighbor_sum = factory.ProgramFieldProvider( - func=mf.compute_weighted_cell_neighbor_sum, + func=mf.compute_weighted_cell_neighbor_sum.with_backend(self._backend), deps={ "maxslp": attrs.MAXSLP, "maxhgtd": attrs.MAXHGTD, @@ -966,7 +966,7 @@ def _register_computed_fields(self) -> None: # noqa: PLR0915 [too-many-statemen self.register_provider(compute_diffusion_intcoef_and_vertoffset) compute_advection_deepatmo_fields = factory.ProgramFieldProvider( - func=compute_advection_metrics.compute_advection_deepatmo_fields, + func=compute_advection_metrics.compute_advection_deepatmo_fields.with_backend(self._backend), domain={ dims.KDim: ( vertical_domain(v_grid.Zone.TOP), From 599505872c3513179fd50d94d098f3d018852053 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 16 Mar 2026 17:34:30 +0100 Subject: [PATCH 41/68] Add missing allocator to test_parallel_grid_refinement.py --- .../common/grid/mpi_tests/test_parallel_grid_refinement.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py index ce8984e071..80d1039154 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py @@ -11,7 +11,7 @@ import gt4py.next as gtx import pytest -from icon4py.model.common import dimension as dims +from icon4py.model.common import dimension as dims, model_backends from icon4py.model.common.decomposition import ( decomposer as decomp, definitions as decomposition, @@ -123,6 +123,7 @@ def test_bounds_decomposition( file=file, run_properties=processor_props, decomposer=decomp.MetisDecomposer(), + allocator=model_backends.get_allocator(backend), ) _log.info( f"rank = {processor_props.rank} : {grid_manager.decomposition_info.get_horizontal_size()!r}" From 2ff0109c88ac1e2de163518312812d4876e2e0df Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 17 Mar 2026 11:05:13 +0100 Subject: [PATCH 42/68] Add another missing allocator --- .../tests/common/grid/mpi_tests/test_parallel_grid_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index 14183a137f..940053e1dc 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -725,6 +725,7 @@ def test_metrics_mask_prog_halo_c( run_properties=processor_props, decomposer=decomp.MetisDecomposer(), num_levels=experiment.num_levels, + allocator=model_backends.get_allocator(backend), ) _log.info( f"rank = {processor_props.rank} : {multi_rank_grid_manager.decomposition_info.get_horizontal_size()!r}" From f802f985a2d05ea5fa2597d4b4f90ac5605a92ab Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 17 Mar 2026 11:08:13 +0100 Subject: [PATCH 43/68] More allocators --- .../tests/common/grid/mpi_tests/test_parallel_grid_manager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index 940053e1dc..486bb3889b 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -809,6 +809,7 @@ def test_metrics_mask_prog_halo_c( def test_validate_skip_values_in_distributed_connectivities( processor_props: decomp_defs.ProcessProperties, experiment: test_defs.Experiment, + backend: gtx_typing.Backend | None, ) -> None: if experiment == test_defs.Experiments.MCH_CH_R04B09: pytest.xfail("Limited-area grids not yet supported") @@ -818,6 +819,7 @@ def test_validate_skip_values_in_distributed_connectivities( file=file, run_properties=processor_props, decomposer=decomp.MetisDecomposer(), + allocator=model_backends.get_allocator(backend), ) distributed_grid = multi_rank_grid_manager.grid for k, c in distributed_grid.connectivities.items(): @@ -841,6 +843,7 @@ def test_validate_skip_values_in_distributed_connectivities( def test_limited_area_raises( processor_props: decomp_defs.ProcessProperties, grid: test_defs.GridDescription, + backend: gtx_typing.Backend | None, ) -> None: with pytest.raises( NotImplementedError, match="Limited-area grids are not supported in distributed runs" @@ -849,4 +852,5 @@ def test_limited_area_raises( file=grid_utils.resolve_full_grid_file_name(grid), run_properties=processor_props, decomposer=decomp.MetisDecomposer(), + allocator=model_backends.get_allocator(backend), ) From 8d78f0ba4bcdb29ed4b4e0a2424296999d6a1774 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 17 Mar 2026 11:24:01 +0100 Subject: [PATCH 44/68] Increase timeout in distributed tests --- ci/distributed.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index d1f93c5309..aad1bd3795 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -96,9 +96,9 @@ build_distributed: - if: $COMPONENT == 'atmosphere/dycore' && ($BACKEND == 'dace_cpu' || $BACKEND == 'dace_gpu') variables: SLURM_TIMELIMIT: '00:30:00' - - if: $COMPONENT == 'common' && $BACKEND == 'dace_gpu' + - if: $COMPONENT == 'common' && ($BACKEND == 'dace_gpu' || $BACKEND == 'gtfn_gpu') variables: - SLURM_TIMELIMIT: '00:45:00' + SLURM_TIMELIMIT: '00:60:00' - if: $COMPONENT == 'atmosphere/dycore' variables: SLURM_TIMELIMIT: '00:15:00' From addef83614771ae7d2256966faffd2d4b8c02908 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 17 Mar 2026 12:02:51 +0100 Subject: [PATCH 45/68] Format files --- .../model/common/metrics/metrics_factory.py | 4 +++- .../mpi_tests/test_parallel_grid_manager.py | 24 +++++++++++++++---- .../tests/common/grid/mpi_tests/utils.py | 5 +++- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/model/common/src/icon4py/model/common/metrics/metrics_factory.py b/model/common/src/icon4py/model/common/metrics/metrics_factory.py index f74180348d..1f96d4230d 100644 --- a/model/common/src/icon4py/model/common/metrics/metrics_factory.py +++ b/model/common/src/icon4py/model/common/metrics/metrics_factory.py @@ -966,7 +966,9 @@ def _register_computed_fields(self) -> None: # noqa: PLR0915 [too-many-statemen self.register_provider(compute_diffusion_intcoef_and_vertoffset) compute_advection_deepatmo_fields = factory.ProgramFieldProvider( - func=compute_advection_metrics.compute_advection_deepatmo_fields.with_backend(self._backend), + func=compute_advection_metrics.compute_advection_deepatmo_fields.with_backend( + self._backend + ), domain={ dims.KDim: ( vertical_domain(v_grid.Zone.TOP), diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index 486bb3889b..3c042df440 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -137,10 +137,18 @@ def check_local_global_field( if check_halos: np.testing.assert_allclose( global_reference_field[ - data_alloc.as_numpy(decomposition_info.global_index(dim, decomp_defs.DecompositionInfo.EntryType.HALO)) + data_alloc.as_numpy( + decomposition_info.global_index( + dim, decomp_defs.DecompositionInfo.EntryType.HALO + ) + ) ], local_field[ - data_alloc.as_numpy(decomposition_info.local_index(dim, decomp_defs.DecompositionInfo.EntryType.HALO)) + data_alloc.as_numpy( + decomposition_info.local_index( + dim, decomp_defs.DecompositionInfo.EntryType.HALO + ) + ) ], atol=1e-9, verbose=True, @@ -150,7 +158,9 @@ def check_local_global_field( # field, by gathering owned entries to the first rank. This ensures that in # total we have the full global field distributed on all ranks. owned_entries = local_field[ - data_alloc.as_numpy(decomposition_info.local_index(dim, decomp_defs.DecompositionInfo.EntryType.OWNED)) + data_alloc.as_numpy( + decomposition_info.local_index(dim, decomp_defs.DecompositionInfo.EntryType.OWNED) + ) ] gathered_sizes, gathered_field = gather_field(owned_entries, processor_props) @@ -269,7 +279,9 @@ def test_geometry_fields_compare_single_multi_rank( # grid/geometry/interpolation/metrics factories. grid_file = grid_utils._download_grid_file(grid_description) _log.info(f"running on {processor_props.comm} with {processor_props.comm_size} ranks") - single_rank_grid_manager = utils.run_grid_manager_for_single_rank(grid_file, allocator=allocator) + single_rank_grid_manager = utils.run_grid_manager_for_single_rank( + grid_file, allocator=allocator + ) single_rank_geometry = geometry.GridGeometry( backend=backend, grid=single_rank_grid_manager.grid, @@ -542,7 +554,9 @@ def test_metrics_fields_compare_single_multi_rank( ) _log.info(f"running on {processor_props.comm} with {processor_props.comm_size} ranks") - single_rank_grid_manager = utils.run_grid_manager_for_single_rank(file, allocator=allocator, num_levels=experiment.num_levels) + single_rank_grid_manager = utils.run_grid_manager_for_single_rank( + file, allocator=allocator, num_levels=experiment.num_levels + ) single_rank_geometry = geometry.GridGeometry( backend=backend, grid=single_rank_grid_manager.grid, diff --git a/model/common/tests/common/grid/mpi_tests/utils.py b/model/common/tests/common/grid/mpi_tests/utils.py index c94eeaa3af..ca771e1d3c 100644 --- a/model/common/tests/common/grid/mpi_tests/utils.py +++ b/model/common/tests/common/grid/mpi_tests/utils.py @@ -47,6 +47,9 @@ def run_grid_manager_for_multi_rank( ) -> gm.GridManager: manager = _grid_manager(file, num_levels) manager( - keep_skip_values=True, allocator=allocator, run_properties=run_properties, decomposer=decomposer + keep_skip_values=True, + allocator=allocator, + run_properties=run_properties, + decomposer=decomposer, ) return manager From 5d97f1da73cdc1f02676f151b240487948d8feb7 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 17 Mar 2026 14:00:00 +0100 Subject: [PATCH 46/68] Increase timelimit further --- ci/distributed.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index aad1bd3795..a023be0c21 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -98,7 +98,8 @@ build_distributed: SLURM_TIMELIMIT: '00:30:00' - if: $COMPONENT == 'common' && ($BACKEND == 'dace_gpu' || $BACKEND == 'gtfn_gpu') variables: - SLURM_TIMELIMIT: '00:60:00' + # TODO(msimberg): This is very long, can we do better? + SLURM_TIMELIMIT: '01:30:00' - if: $COMPONENT == 'atmosphere/dycore' variables: SLURM_TIMELIMIT: '00:15:00' From 9b153ff9fe7e6f98407bc909de712e7927d85f5c Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 18 Mar 2026 15:43:10 +0100 Subject: [PATCH 47/68] More consistency for cupy/numpy, use cupy more extensively in serialbox.py --- .../mpi_tests/test_parallel_grid_manager.py | 12 ++-- .../test_parallel_grid_refinement.py | 6 +- .../src/icon4py/model/testing/serialbox.py | 57 +++++++++---------- 3 files changed, 38 insertions(+), 37 deletions(-) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index 3c042df440..4c567629c6 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -798,22 +798,22 @@ def test_metrics_mask_prog_halo_c( ) attrs_name = metrics_attributes.MASK_PROG_HALO_C - field = multi_rank_metrics.get(attrs_name).asnumpy() - c_refin_ctrl = multi_rank_metrics.get("c_refin_ctrl").asnumpy() - assert not np.any( + field = multi_rank_metrics.get(attrs_name).ndarray + c_refin_ctrl = multi_rank_metrics.get("c_refin_ctrl").ndarray + assert not ( field[ multi_rank_grid_manager.decomposition_info.local_index( dims.CellDim, decomp_defs.DecompositionInfo.EntryType.OWNED ) ] - ), f"rank={processor_props.rank} - found nonzero in owned entries of {attrs_name}" + ).any(), f"rank={processor_props.rank} - found nonzero in owned entries of {attrs_name}" halo_indices = multi_rank_grid_manager.decomposition_info.local_index( dims.CellDim, decomp_defs.DecompositionInfo.EntryType.HALO ) - assert np.all( + assert ( field[halo_indices] == ~((c_refin_ctrl[halo_indices] >= 1) & (c_refin_ctrl[halo_indices] <= 4)) - ), f"rank={processor_props.rank} - halo for MASK_PROG_HALO_C is incorrect" + ).all(), f"rank={processor_props.rank} - halo for MASK_PROG_HALO_C is incorrect" _log.info(f"rank = {processor_props.rank} - DONE") diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py index 80d1039154..a953113b47 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py @@ -18,6 +18,7 @@ mpi_decomposition, ) from icon4py.model.common.grid import grid_refinement, horizontal as h_grid +from icon4py.model.common.utils import data_allocation as data_alloc from icon4py.model.testing import definitions, grid_utils, serialbox, test_utils from icon4py.model.testing.fixtures.datatest import ( backend, @@ -65,6 +66,7 @@ def test_compute_domain_bounds( experiment: definitions.Experiment, grid_savepoint: serialbox.IconGridSavepoint, processor_props: decomposition.ProcessProperties, + backend: gtx.typing.Backend | None, ) -> None: if ( processor_props.is_single_rank() @@ -75,11 +77,11 @@ def test_compute_domain_bounds( "end index data for single node APE are all 0 - re- serialization should fix that (patch%cells%end_index vs patch%cells%end_idx)" ) - ref_grid = grid_savepoint.construct_icon_grid(backend=None, keep_skip_values=True) + ref_grid = grid_savepoint.construct_icon_grid(backend=backend, keep_skip_values=True) decomposition_info = grid_savepoint.construct_decomposition_info() refin_ctrl = {dim: grid_savepoint.refin_ctrl(dim) for dim in utils.main_horizontal_dims()} start_indices, end_indices = grid_refinement.compute_domain_bounds( - dim, refin_ctrl, decomposition_info + dim, refin_ctrl, decomposition_info, array_ns=data_alloc.import_array_ns(backend), ) if ( experiment == definitions.Experiments.GAUSS3D diff --git a/model/testing/src/icon4py/model/testing/serialbox.py b/model/testing/src/icon4py/model/testing/serialbox.py index 84fc38a962..66c1149014 100644 --- a/model/testing/src/icon4py/model/testing/serialbox.py +++ b/model/testing/src/icon4py/model/testing/serialbox.py @@ -11,7 +11,6 @@ import gt4py.next as gtx import gt4py.next.typing as gtx_typing -import numpy as np import serialbox import icon4py.model.common.decomposition.definitions as decomposition @@ -85,7 +84,7 @@ def log_meta_info(self): self.log.info(self.savepoint.metainfo) def _get_field(self, name, *dimensions, dtype=float): - buffer = np.squeeze(self.serializer.read(name, self.savepoint).astype(dtype)) + buffer = self.xp.squeeze(self.serializer.read(name, self.savepoint).astype(dtype)) buffer = self._reduce_to_dim_size(buffer, dimensions) self.log.debug(f"{name} {buffer.shape}") @@ -93,7 +92,7 @@ def _get_field(self, name, *dimensions, dtype=float): def _get_field_component(self, name: str, level: int, dims: tuple[gtx.Dimension, gtx]): buffer = self.serializer.read(name, self.savepoint).astype(float) - buffer = np.squeeze(buffer)[:, :, level] + buffer = self.xp.squeeze(buffer)[:, :, level] buffer = self._reduce_to_dim_size(buffer, dims) self.log.debug(f"{name} {buffer.shape}") return gtx.as_field(dims, buffer, allocator=self.backend) @@ -136,7 +135,7 @@ def _read_bool(self, name: str): return self._read(name, offset=0, dtype=bool) def _read(self, name: str, offset=0, dtype=int): - return np.squeeze(self.serializer.read(name, self.savepoint) - offset).astype(dtype) + return self.xp.asarray(self.xp.squeeze(self.serializer.read(name, self.savepoint) - offset).astype(dtype)) class IconGridSavepoint(IconSavepoint): @@ -365,35 +364,35 @@ def edge_cell_length(self): def cells_start_index(self): start_idx = self._read_int32("c_start_index") - return np.where(start_idx == 0, start_idx, start_idx - 1) + return self.xp.where(start_idx == 0, start_idx, start_idx - 1) def cells_end_index(self): return self._read_int32("c_end_index") def vertex_start_index(self): start_idx = self._read_int32("v_start_index") - return np.where(start_idx == 0, start_idx, start_idx - 1) + return self.xp.where(start_idx == 0, start_idx, start_idx - 1) def vertex_end_index(self): return self._read_int32("v_end_index") def edge_start_index(self): start_idx = self._read_int32("e_start_index") - return np.where(start_idx == 0, start_idx, start_idx - 1) + return self.xp.where(start_idx == 0, start_idx, start_idx - 1) def edge_end_index(self): # don't need to subtract 1, because FORTRAN slices are inclusive [from:to] so the being # one off accounts for being exclusive [from:to) return self._read_int32("e_end_index") - def start_index(self) -> dict[gtx.Dimension, np.ndarray]: + def start_index(self) -> dict[gtx.Dimension, data_alloc.NDArray]: return { dims.CellDim: self.cells_start_index(), dims.EdgeDim: self.edge_start_index(), dims.VertexDim: self.vertex_start_index(), } - def end_index(self) -> dict[gtx.Dimension, np.ndarray]: + def end_index(self) -> dict[gtx.Dimension, data_alloc.NDArray]: return { dims.CellDim: self.cells_end_index(), dims.EdgeDim: self.edge_end_index(), @@ -426,7 +425,7 @@ def c2e(self): def _get_connectivity_array(self, name: str, target_dim: gtx.Dimension, reverse: bool = False): if reverse: - connectivity = np.transpose(self._read_int32(name, offset=1))[ + connectivity = self.xp.transpose(self._read_int32(name, offset=1))[ : self.sizes[target_dim], : ] else: @@ -442,7 +441,7 @@ def e2c2e(self): def c2e2c2e(self): if self._c2e2c2e() is None: - return np.zeros((self.sizes[dims.CellDim], 9), dtype=gtx.int32) + return self.xp.zeros((self.sizes[dims.CellDim], 9), dtype=gtx.int32) else: return self._c2e2c2e() @@ -501,7 +500,7 @@ def _read_field_for_dim(field_name, read_func, dim: gtx.Dimension): ) def owner_mask(self, dim: gtx.Dimension): - return np.squeeze(self._read_field_for_dim("owner_mask", self._read_bool, dim)) + return self.xp.squeeze(self._read_field_for_dim("owner_mask", self._read_bool, dim)) def global_index(self, dim: gtx.Dimension): return self._read_field_for_dim("glb_index", self._read_int32_shift1, dim) @@ -552,8 +551,8 @@ def potentially_revert_icon_index_transformation(ar): c2e2c = self.c2e2c() e2c2e = potentially_revert_icon_index_transformation(self.e2c2e()) - c2e2c0 = np.column_stack((range(c2e2c.shape[0]), c2e2c)) - e2c2e0 = np.column_stack((range(e2c2e.shape[0]), e2c2e)) + c2e2c0 = self.xp.column_stack((self.xp.asarray(range(c2e2c.shape[0])), c2e2c)) + e2c2e0 = self.xp.column_stack((self.xp.asarray(range(e2c2e.shape[0])), e2c2e)) constructor = functools.partial( h_grid.get_start_end_idx_from_icon_arrays, @@ -648,7 +647,7 @@ def geofac_grdiv(self): return self._get_field("geofac_grdiv", dims.EdgeDim, dims.E2C2EODim) def geofac_grg(self): - grg = np.squeeze(self.serializer.read("geofac_grg", self.savepoint)) + grg = self.xp.squeeze(self.serializer.read("geofac_grg", self.savepoint)) num_cells = self.sizes[dims.CellDim] return gtx.as_field( (dims.CellDim, dims.C2E2CODim), grg[:num_cells, :, 0], allocator=self.backend @@ -677,7 +676,7 @@ def rbf_vec_coeff_e(self): @IconSavepoint.optionally_registered() def rbf_vec_coeff_c1(self): dimensions = (dims.CellDim, dims.C2E2C2EDim) - buffer = np.squeeze( + buffer = self.xp.squeeze( self.serializer.read("rbf_vec_coeff_c1", self.savepoint).astype(float) ).transpose() buffer = self._reduce_to_dim_size(buffer, dimensions) @@ -686,7 +685,7 @@ def rbf_vec_coeff_c1(self): @IconSavepoint.optionally_registered() def rbf_vec_coeff_c2(self): dimensions = (dims.CellDim, dims.C2E2C2EDim) - buffer = np.squeeze( + buffer = self.xp.squeeze( self.serializer.read("rbf_vec_coeff_c2", self.savepoint).astype(float) ).transpose() buffer = self._reduce_to_dim_size(buffer, dimensions) @@ -739,15 +738,15 @@ def mask_prog_halo_c(self): @IconSavepoint.optionally_registered() def pg_edgeidx(self): - return np.squeeze(self.serializer.read("pg_edgeidx", self.savepoint)) + return self.xp.squeeze(self.serializer.read("pg_edgeidx", self.savepoint)) @IconSavepoint.optionally_registered() def pg_vertidx(self): - return np.squeeze(self.serializer.read("pg_vertidx", self.savepoint)) + return self.xp.squeeze(self.serializer.read("pg_vertidx", self.savepoint)) @IconSavepoint.optionally_registered() def pg_exdist(self): - return np.squeeze(self.serializer.read("pg_exdist", self.savepoint)) + return self.xp.squeeze(self.serializer.read("pg_exdist", self.savepoint)) def pg_exdist_dsl(self): pg_edgeidx = self.pg_edgeidx() @@ -891,12 +890,12 @@ def geopot(self): @IconSavepoint.optionally_registered() def zd_cellidx(self): - return np.squeeze(self.serializer.read("zd_cellidx", self.savepoint)) + return self.xp.squeeze(self.serializer.read("zd_cellidx", self.savepoint)) @IconSavepoint.optionally_registered() def zd_vertidx(self): # this is the k list (with fortran 1-based indexing) for the central point of the C2E2C stencil - return np.squeeze(self.serializer.read("zd_vertidx", self.savepoint))[0, :] + return self.xp.squeeze(self.serializer.read("zd_vertidx", self.savepoint))[0, :] @IconSavepoint.optionally_registered(dims.CellDim, dims.C2E2CDim, dims.KDim, dtype=gtx.int32) def zd_vertoffset(self): @@ -904,7 +903,7 @@ def zd_vertoffset(self): zd_vertidx = self.zd_vertidx() # these are the three k offsets for the C2E2C neighbors zd_vertoffset = ( - np.squeeze(self.serializer.read("zd_vertidx", self.savepoint))[1:, :] - zd_vertidx + self.xp.squeeze(self.serializer.read("zd_vertidx", self.savepoint))[1:, :] - zd_vertidx ) cell_c2e2c_k_domain = gtx.domain( { @@ -929,7 +928,7 @@ def zd_vertoffset(self): def zd_intcoef(self): zd_cellidx = self.zd_cellidx() zd_vertidx = self.zd_vertidx() - zd_intcoef = np.squeeze(self.serializer.read("zd_intcoef", self.savepoint)) + zd_intcoef = self.xp.squeeze(self.serializer.read("zd_intcoef", self.savepoint)) cell_c2e2c_k_domain = gtx.domain( { dims.CellDim: self.theta_ref_mc().domain[dims.CellDim].unit_range, @@ -953,7 +952,7 @@ def zd_intcoef(self): def zd_diffcoef(self): zd_cellidx = self.zd_cellidx() zd_vertidx = self.zd_vertidx() - zd_diffcoef = np.squeeze(self.serializer.read("zd_diffcoef", self.savepoint)) + zd_diffcoef = self.xp.squeeze(self.serializer.read("zd_diffcoef", self.savepoint)) return data_alloc.list2field( domain=self.geopot().domain, values=zd_diffcoef, @@ -1030,16 +1029,16 @@ def exner(self): return self._get_field("exner", dims.CellDim, dims.KDim) def diff_multfac_smag(self): - return np.squeeze(self.serializer.read("diff_multfac_smag", self.savepoint)) + return self.xp.squeeze(self.serializer.read("diff_multfac_smag", self.savepoint)) def enh_smag_fac(self): - return np.squeeze(self.serializer.read("enh_smag_fac", self.savepoint)) + return self.xp.squeeze(self.serializer.read("enh_smag_fac", self.savepoint)) def smag_limit(self): - return np.squeeze(self.serializer.read("smag_limit", self.savepoint)) + return self.xp.squeeze(self.serializer.read("smag_limit", self.savepoint)) def diff_multfac_n2w(self): - return np.squeeze(self.serializer.read("diff_multfac_n2w", self.savepoint)) + return self.xp.squeeze(self.serializer.read("diff_multfac_n2w", self.savepoint)) def nudgezone_diff(self) -> int: return self.serializer.read("nudgezone_diff", self.savepoint)[0] From ea3304fe741a369e4cb58d5efd80ace6f296249c Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 18 Mar 2026 15:55:30 +0100 Subject: [PATCH 48/68] Very long distributed gpu ci time limit --- ci/distributed.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index a023be0c21..cfb8825d63 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -99,7 +99,7 @@ build_distributed: - if: $COMPONENT == 'common' && ($BACKEND == 'dace_gpu' || $BACKEND == 'gtfn_gpu') variables: # TODO(msimberg): This is very long, can we do better? - SLURM_TIMELIMIT: '01:30:00' + SLURM_TIMELIMIT: '03:00:00' - if: $COMPONENT == 'atmosphere/dycore' variables: SLURM_TIMELIMIT: '00:15:00' From 89424c98da3e5cc533c768f06750140249cb7594 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 18 Mar 2026 16:14:08 +0100 Subject: [PATCH 49/68] Move check_local_global_field helper to common file for reuse elsewhere --- .../mpi_tests/test_parallel_grid_manager.py | 119 +---------------- .../icon4py/model/testing/parallel_helpers.py | 120 +++++++++++++++++- 2 files changed, 121 insertions(+), 118 deletions(-) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index 4c2100a931..1fb5869a5b 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -5,9 +5,7 @@ # # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause -import functools import logging -import operator import numpy as np import pytest @@ -33,7 +31,7 @@ from icon4py.model.common.metrics import metrics_attributes, metrics_factory from icon4py.model.common.states import utils as state_utils from icon4py.model.common.utils import data_allocation as data_alloc -from icon4py.model.testing import definitions as test_defs, grid_utils, test_utils +from icon4py.model.testing import definitions as test_defs, grid_utils, parallel_helpers, test_utils from icon4py.model.testing.fixtures.datatest import ( backend, experiment, @@ -85,115 +83,6 @@ def _get_neighbor_tables(grid: base.Grid) -> dict: } -def gather_field(field: np.ndarray, props: decomp_defs.ProcessProperties) -> tuple: - constant_dims = tuple(field.shape[1:]) - _log.info(f"gather_field on rank={props.rank} - gathering field of local shape {field.shape}") - # Because of sparse indexing the field may have a non-contigous layout, - # which Gatherv doesn't support. Make sure the field is contiguous. - field = np.ascontiguousarray(field) - constant_length = functools.reduce(operator.mul, constant_dims, 1) - local_sizes = np.array(props.comm.gather(field.size, root=0)) - if props.rank == 0: - recv_buffer = np.empty(np.sum(local_sizes), dtype=field.dtype) - _log.info( - f"gather_field on rank = {props.rank} - setup receive buffer with size {sum(local_sizes)} on rank 0" - ) - else: - recv_buffer = None - - props.comm.Gatherv(sendbuf=field, recvbuf=(recv_buffer, local_sizes), root=0) - if props.rank == 0: - local_first_dim = tuple(sz // constant_length for sz in local_sizes) - _log.info( - f" gather_field on rank = 0: computed local dims {local_first_dim} - constant dims {constant_dims}" - ) - gathered_field = recv_buffer.reshape((-1, *constant_dims)) # type: ignore [union-attr] - else: - gathered_field = None - local_first_dim = field.shape - return local_first_dim, gathered_field - - -def check_local_global_field( - decomposition_info: decomp_defs.DecompositionInfo, - processor_props: decomp_defs.ProcessProperties, # F811 # fixture - dim: gtx.Dimension, - global_reference_field: np.ndarray, - local_field: np.ndarray, - check_halos: bool, -) -> None: - if dim == dims.KDim: - np.testing.assert_allclose(global_reference_field, local_field) - return - - _log.info( - f" rank= {processor_props.rank}/{processor_props.comm_size}----exchanging field of main dim {dim}" - ) - assert ( - local_field.shape[0] - == decomposition_info.global_index(dim, decomp_defs.DecompositionInfo.EntryType.ALL).shape[ - 0 - ] - ) - - # Compare halo against global reference field - if check_halos: - np.testing.assert_allclose( - global_reference_field[ - data_alloc.as_numpy( - decomposition_info.global_index( - dim, decomp_defs.DecompositionInfo.EntryType.HALO - ) - ) - ], - local_field[ - data_alloc.as_numpy( - decomposition_info.local_index( - dim, decomp_defs.DecompositionInfo.EntryType.HALO - ) - ) - ], - atol=1e-9, - verbose=True, - ) - - # Compare owned local field, excluding halos, against global reference - # field, by gathering owned entries to the first rank. This ensures that in - # total we have the full global field distributed on all ranks. - owned_entries = local_field[ - data_alloc.as_numpy( - decomposition_info.local_index(dim, decomp_defs.DecompositionInfo.EntryType.OWNED) - ) - ] - gathered_sizes, gathered_field = gather_field(owned_entries, processor_props) - - global_index_sizes, gathered_global_indices = gather_field( - decomposition_info.global_index(dim, decomp_defs.DecompositionInfo.EntryType.OWNED), - processor_props, - ) - - if processor_props.rank == 0: - _log.info(f"rank = {processor_props.rank}: asserting gathered fields: ") - - assert np.all( - gathered_sizes == global_index_sizes - ), f"gathered field sizes do not match: {dim} {gathered_sizes} - {global_index_sizes}" - _log.info( - f"rank = {processor_props.rank}: Checking field size on dim ={dim}: --- gathered sizes {gathered_sizes} = {sum(gathered_sizes)}" - ) - _log.info( - f"rank = {processor_props.rank}: --- gathered field has size {gathered_sizes}" - ) - sorted_ = np.zeros(global_reference_field.shape, dtype=gtx.float64) # type: ignore [attr-defined] - sorted_[gathered_global_indices] = gathered_field - _log.info( - f" rank = {processor_props.rank}: SHAPES: global reference field {global_reference_field.shape}, gathered = {gathered_field.shape}" - ) - - # TODO(msimberg): The tolerance is high only for RBF fields. Fix it. - np.testing.assert_allclose(sorted_, global_reference_field, atol=3e-9, verbose=True) - - # These fields can't be computed with the embedded backend for one reason or # another, so we declare them here for xfailing. embedded_broken_fields = { @@ -332,7 +221,7 @@ def test_geometry_fields_compare_single_multi_rank( field = multi_rank_geometry.get(attrs_name) dim = field_ref.domain.dims[0] - check_local_global_field( + parallel_helpers.check_local_global_field( decomposition_info=multi_rank_grid_manager.decomposition_info, processor_props=processor_props, dim=dim, @@ -449,7 +338,7 @@ def test_interpolation_fields_compare_single_multi_rank( field = multi_rank_interpolation.get(attrs_name) dim = field_ref.domain.dims[0] - check_local_global_field( + parallel_helpers.check_local_global_field( decomposition_info=multi_rank_grid_manager.decomposition_info, processor_props=processor_props, dim=dim, @@ -682,7 +571,7 @@ def test_metrics_fields_compare_single_multi_rank( assert isinstance(field, state_utils.ScalarType) assert pytest.approx(field) == field_ref else: - check_local_global_field( + parallel_helpers.check_local_global_field( decomposition_info=multi_rank_grid_manager.decomposition_info, processor_props=processor_props, dim=field_ref.domain.dims[0], diff --git a/model/testing/src/icon4py/model/testing/parallel_helpers.py b/model/testing/src/icon4py/model/testing/parallel_helpers.py index b0ad1b0465..ea6aa1740e 100644 --- a/model/testing/src/icon4py/model/testing/parallel_helpers.py +++ b/model/testing/src/icon4py/model/testing/parallel_helpers.py @@ -5,15 +5,20 @@ # # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause +import functools import logging +import operator +import numpy as np import pytest +from gt4py import next as gtx from icon4py.model.common import dimension as dims from icon4py.model.common.decomposition import definitions +from icon4py.model.common.utils import data_allocation as data_alloc -log = logging.getLogger(__file__) +_log = logging.getLogger(__file__) def check_comm_size( @@ -24,12 +29,121 @@ def check_comm_size( def log_process_properties(props: definitions.ProcessProperties) -> None: - log.info(f"rank={props.rank}/{props.comm_size}") + _log.info(f"rank={props.rank}/{props.comm_size}") def log_local_field_size(decomposition_info: definitions.DecompositionInfo) -> None: - log.info( + _log.info( f"local grid size: cells={decomposition_info.global_index(dims.CellDim).size}, " f"edges={decomposition_info.global_index(dims.EdgeDim).size}, " f"vertices={decomposition_info.global_index(dims.VertexDim).size}" ) + + +def gather_field(field: np.ndarray, props: definitions.ProcessProperties) -> tuple: + constant_dims = tuple(field.shape[1:]) + _log.info(f"gather_field on rank={props.rank} - gathering field of local shape {field.shape}") + # Because of sparse indexing the field may have a non-contigous layout, + # which Gatherv doesn't support. Make sure the field is contiguous. + field = np.ascontiguousarray(field) + constant_length = functools.reduce(operator.mul, constant_dims, 1) + local_sizes = np.array(props.comm.gather(field.size, root=0)) + if props.rank == 0: + recv_buffer = np.empty(np.sum(local_sizes), dtype=field.dtype) + _log.info( + f"gather_field on rank = {props.rank} - setup receive buffer with size {sum(local_sizes)} on rank 0" + ) + else: + recv_buffer = None + + props.comm.Gatherv(sendbuf=field, recvbuf=(recv_buffer, local_sizes), root=0) + if props.rank == 0: + local_first_dim = tuple(sz // constant_length for sz in local_sizes) + _log.info( + f" gather_field on rank = 0: computed local dims {local_first_dim} - constant dims {constant_dims}" + ) + gathered_field = recv_buffer.reshape((-1, *constant_dims)) # type: ignore [union-attr] + else: + gathered_field = None + local_first_dim = field.shape + return local_first_dim, gathered_field + + +def check_local_global_field( + decomposition_info: definitions.DecompositionInfo, + processor_props: definitions.ProcessProperties, # F811 # fixture + dim: gtx.Dimension, + global_reference_field: np.ndarray, + local_field: np.ndarray, + check_halos: bool, +) -> None: + if dim == dims.KDim: + np.testing.assert_allclose(global_reference_field, local_field) + return + + _log.info( + f" rank= {processor_props.rank}/{processor_props.comm_size}----exchanging field of main dim {dim}" + ) + assert ( + local_field.shape[0] + == decomposition_info.global_index(dim, definitions.DecompositionInfo.EntryType.ALL).shape[ + 0 + ] + ) + + # Compare halo against global reference field + if check_halos: + np.testing.assert_allclose( + global_reference_field[ + data_alloc.as_numpy( + decomposition_info.global_index( + dim, definitions.DecompositionInfo.EntryType.HALO + ) + ) + ], + local_field[ + data_alloc.as_numpy( + decomposition_info.local_index( + dim, definitions.DecompositionInfo.EntryType.HALO + ) + ) + ], + atol=1e-9, + verbose=True, + ) + + # Compare owned local field, excluding halos, against global reference + # field, by gathering owned entries to the first rank. This ensures that in + # total we have the full global field distributed on all ranks. + owned_entries = local_field[ + data_alloc.as_numpy( + decomposition_info.local_index(dim, definitions.DecompositionInfo.EntryType.OWNED) + ) + ] + gathered_sizes, gathered_field = gather_field(owned_entries, processor_props) + + global_index_sizes, gathered_global_indices = gather_field( + data_alloc.as_numpy(decomposition_info.global_index(dim, definitions.DecompositionInfo.EntryType.OWNED)), + processor_props, + ) + + if processor_props.rank == 0: + _log.info(f"rank = {processor_props.rank}: asserting gathered fields: ") + + assert np.all( + gathered_sizes == global_index_sizes + ), f"gathered field sizes do not match: {dim} {gathered_sizes} - {global_index_sizes}" + _log.info( + f"rank = {processor_props.rank}: Checking field size on dim ={dim}: --- gathered sizes {gathered_sizes} = {sum(gathered_sizes)}" + ) + _log.info( + f"rank = {processor_props.rank}: --- gathered field has size {gathered_sizes}" + ) + sorted_ = np.zeros(global_reference_field.shape, dtype=gtx.float64) # type: ignore [attr-defined] + sorted_[gathered_global_indices] = gathered_field + _log.info( + f" rank = {processor_props.rank}: SHAPES: global reference field {global_reference_field.shape}, gathered = {gathered_field.shape}" + ) + + # TODO(msimberg): The tolerance is high only for RBF fields. Fix it. + np.testing.assert_allclose(sorted_, global_reference_field, atol=3e-9, verbose=True) From 79dcdb38090fb9c86a8b5e055adbc9bd8ac19f01 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 18 Mar 2026 16:19:50 +0100 Subject: [PATCH 50/68] Add customizable tolerance to check_local_global_field --- .../common/grid/mpi_tests/test_parallel_grid_manager.py | 3 +++ model/testing/src/icon4py/model/testing/parallel_helpers.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index 1fb5869a5b..b64240a9a7 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -228,6 +228,7 @@ def test_geometry_fields_compare_single_multi_rank( global_reference_field=field_ref.asnumpy(), local_field=field.asnumpy(), check_halos=True, + atol=0.0, ) _log.info(f"rank = {processor_props.rank} - DONE") @@ -345,6 +346,7 @@ def test_interpolation_fields_compare_single_multi_rank( global_reference_field=field_ref.asnumpy(), local_field=field.asnumpy(), check_halos=True, + atol=3e-9 if attrs_name.startswith("rbf") else 0.0, ) _log.info(f"rank = {processor_props.rank} - DONE") @@ -578,6 +580,7 @@ def test_metrics_fields_compare_single_multi_rank( global_reference_field=field_ref.asnumpy(), local_field=field.asnumpy(), check_halos=(attrs_name != metrics_attributes.WGTFAC_E), + atol=0.0, ) _log.info(f"rank = {processor_props.rank} - DONE") diff --git a/model/testing/src/icon4py/model/testing/parallel_helpers.py b/model/testing/src/icon4py/model/testing/parallel_helpers.py index ea6aa1740e..8d4a24782f 100644 --- a/model/testing/src/icon4py/model/testing/parallel_helpers.py +++ b/model/testing/src/icon4py/model/testing/parallel_helpers.py @@ -76,6 +76,7 @@ def check_local_global_field( global_reference_field: np.ndarray, local_field: np.ndarray, check_halos: bool, + atol: float, ) -> None: if dim == dims.KDim: np.testing.assert_allclose(global_reference_field, local_field) @@ -108,7 +109,7 @@ def check_local_global_field( ) ) ], - atol=1e-9, + atol=atol, verbose=True, ) @@ -145,5 +146,4 @@ def check_local_global_field( f" rank = {processor_props.rank}: SHAPES: global reference field {global_reference_field.shape}, gathered = {gathered_field.shape}" ) - # TODO(msimberg): The tolerance is high only for RBF fields. Fix it. - np.testing.assert_allclose(sorted_, global_reference_field, atol=3e-9, verbose=True) + np.testing.assert_allclose(sorted_, global_reference_field, atol=atol, verbose=True) From d555a26a9bcdfaf4c709a6a43b3c1ad253574d01 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 19 Mar 2026 11:30:36 +0100 Subject: [PATCH 51/68] numpy/cupy fixes --- .../mpi_tests/test_mpi_decomposition.py | 18 +++---- .../decomposition/unit_tests/test_halo.py | 4 +- .../grid/unit_tests/test_grid_manager.py | 48 +++++++++---------- .../unit_tests/test_rbf_interpolation.py | 6 +-- .../math/unit_tests/test_smagorinsky.py | 5 +- 5 files changed, 42 insertions(+), 39 deletions(-) diff --git a/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py b/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py index 094fecd132..c2cb0d1ac5 100644 --- a/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py +++ b/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py @@ -290,9 +290,10 @@ def test_exchange_on_dummy_data( decomposition_info: definitions.DecompositionInfo, grid_savepoint: serialbox.IconGridSavepoint, dimension: gtx.Dimension, + backend: gtx.typing.Backend | None, ) -> None: exchange = definitions.create_exchange(processor_props, decomposition_info) - grid = grid_savepoint.construct_icon_grid() + grid = grid_savepoint.construct_icon_grid(backend=backend) number = processor_props.rank + 10 input_field = data_alloc.constant_field( @@ -300,15 +301,16 @@ def test_exchange_on_dummy_data( number, dimension, dims.KDim, + allocator=backend, ) - halo_points = decomposition_info.local_index( + halo_points = data_alloc.as_numpy(decomposition_info.local_index( dimension, definitions.DecompositionInfo.EntryType.HALO - ) - local_points = decomposition_info.local_index( + )) + local_points = data_alloc.as_numpy(decomposition_info.local_index( dimension, definitions.DecompositionInfo.EntryType.OWNED - ) - assert np.all(input_field.asnumpy() == number) + )) + assert (input_field.ndarray == number).all() exchange.exchange(dimension, input_field, stream=definitions.BLOCK) result = input_field.asnumpy() _log.info(f"rank={processor_props.rank} - num of halo points ={halo_points.shape}") @@ -319,8 +321,8 @@ def test_exchange_on_dummy_data( changed_points = np.argwhere(result[:, 2] != number) _log.info(f"rank={processor_props.rank} - num changed points {changed_points.shape} ") - assert np.all(result[local_points, :] == number) - assert np.all(result[halo_points, :] != number) + assert (result[local_points, :] == number).all() + assert (result[halo_points, :] != number).all() @pytest.mark.mpi diff --git a/model/common/tests/common/decomposition/unit_tests/test_halo.py b/model/common/tests/common/decomposition/unit_tests/test_halo.py index 420f1c20ba..d37a234edc 100644 --- a/model/common/tests/common/decomposition/unit_tests/test_halo.py +++ b/model/common/tests/common/decomposition/unit_tests/test_halo.py @@ -13,6 +13,7 @@ from icon4py.model.common import dimension as dims, exceptions, model_backends from icon4py.model.common.decomposition import decomposer as decomp, definitions, halo from icon4py.model.common.grid import base as base_grid, simple +from icon4py.model.common.utils import data_allocation as data_alloc from icon4py.model.testing import test_utils from ...fixtures import backend_like, processor_props @@ -32,7 +33,8 @@ def test_halo_constructor_owned_cells(rank, simple_neighbor_tables, backend_like run_properties=processor_props, allocator=allocator, ) - my_owned_cells = halo_generator.owned_cells(utils.SIMPLE_DISTRIBUTION) + xp = data_alloc.import_array_ns(allocator) + my_owned_cells = data_alloc.as_numpy(halo_generator.owned_cells(xp.asarray(utils.SIMPLE_DISTRIBUTION))) print(f"rank {processor_props.rank} owns {my_owned_cells} ") assert my_owned_cells.size == len(utils._CELL_OWN[processor_props.rank]) diff --git a/model/common/tests/common/grid/unit_tests/test_grid_manager.py b/model/common/tests/common/grid/unit_tests/test_grid_manager.py index c809414e77..f3e2c4e7e5 100644 --- a/model/common/tests/common/grid/unit_tests/test_grid_manager.py +++ b/model/common/tests/common/grid/unit_tests/test_grid_manager.py @@ -74,7 +74,7 @@ def test_grid_manager_eval_v2e( backend: gtx_typing.Backend, ) -> None: grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid - seralized_v2e = grid_savepoint.v2e() + seralized_v2e = data_alloc.as_numpy(grid_savepoint.v2e()) # there are vertices at the boundary of a local domain or at a pentagon point that have less than # 6 neighbors hence there are "Missing values" in the grid file # they get substituted by the "last valid index" in preprocessing step in icon. @@ -120,7 +120,7 @@ def test_grid_manager_eval_v2c( backend: gtx_typing.Backend, ) -> None: grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid - serialized_v2c = grid_savepoint.v2c() + serialized_v2c = data_alloc.as_numpy(grid_savepoint.v2c()) v2c_table = grid.get_connectivity("V2C").asnumpy() # there are vertices that have less than 6 neighboring cells: either pentagon points or # vertices at the boundary of the domain for a limited area mode @@ -176,7 +176,7 @@ def test_grid_manager_eval_e2v( ) -> None: grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid - serialized_e2v = grid_savepoint.e2v() + serialized_e2v = data_alloc.as_numpy(grid_savepoint.e2v()) e2v_table = grid.get_connectivity("E2V").asnumpy() # all vertices in the system have to neighboring edges, there no edges that point nowhere # hence this connectivity has no "missing values" in the grid file @@ -199,7 +199,7 @@ def test_grid_manager_eval_e2c( ) -> None: grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid - serialized_e2c = grid_savepoint.e2c() + serialized_e2c = data_alloc.as_numpy(grid_savepoint.e2c()) e2c_table = grid.get_connectivity("E2C").asnumpy() assert has_invalid_index(serialized_e2c) == grid.limited_area assert has_invalid_index(e2c_table) == grid.limited_area @@ -216,7 +216,7 @@ def test_grid_manager_eval_c2e( ) -> None: grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid - serialized_c2e = grid_savepoint.c2e() + serialized_c2e = data_alloc.as_numpy(grid_savepoint.c2e()) c2e_table = grid.get_connectivity("C2E").asnumpy() # no cells with less than 3 neighboring edges exist, otherwise the cell is not there in the # first place @@ -237,7 +237,7 @@ def test_grid_manager_eval_c2e2c( grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid assert np.allclose( grid.get_connectivity("C2E2C").asnumpy(), - grid_savepoint.c2e2c(), + data_alloc.as_numpy(grid_savepoint.c2e2c()), ) @@ -249,7 +249,7 @@ def test_grid_manager_eval_c2e2cO( backend: gtx_typing.Backend, ) -> None: grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid - serialized_grid = grid_savepoint.construct_icon_grid() + serialized_grid = grid_savepoint.construct_icon_grid(backend=backend) assert np.allclose( grid.get_connectivity("C2E2CO").asnumpy(), serialized_grid.get_connectivity("C2E2CO").asnumpy(), @@ -265,7 +265,7 @@ def test_grid_manager_eval_e2c2e( backend: gtx_typing.Backend, ) -> None: grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid - serialized_grid = grid_savepoint.construct_icon_grid() + serialized_grid = grid_savepoint.construct_icon_grid(backend=backend) serialized_e2c2e = serialized_grid.get_connectivity("E2C2E").asnumpy() serialized_e2c2eO = serialized_grid.get_connectivity("E2C2EO").asnumpy() assert has_invalid_index(serialized_e2c2e) == grid.limited_area @@ -290,7 +290,7 @@ def test_grid_manager_eval_e2c2v( backend: gtx_typing.Backend, ) -> None: grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid - serialized_ref = grid_savepoint.e2c2v() + serialized_ref = data_alloc.as_numpy(grid_savepoint.e2c2v()) # the "far" (adjacent to edge normal ) is not always there, because ICON only calculates those starting from # (lateral_boundary(dims.EdgeDim) + 1) to end(dims.EdgeDim) (see mo_intp_coeffs.f90) and only for owned cells table = grid.get_connectivity("E2C2V").asnumpy() @@ -311,7 +311,7 @@ def test_grid_manager_eval_c2v( ) -> None: grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid c2v = grid.get_connectivity("C2V").asnumpy() - assert np.allclose(c2v, grid_savepoint.c2v()) + assert np.allclose(c2v, data_alloc.as_numpy(grid_savepoint.c2v())) @pytest.mark.parametrize( @@ -397,7 +397,7 @@ def test_grid_manager_eval_c2e2c2e( backend: gtx_typing.Backend, ) -> None: grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid - serialized_grid = grid_savepoint.construct_icon_grid() + serialized_grid = grid_savepoint.construct_icon_grid(backend=backend) assert np.allclose( grid.get_connectivity("C2E2C2E").asnumpy(), serialized_grid.get_connectivity("C2E2C2E").asnumpy(), @@ -415,7 +415,7 @@ def test_grid_manager_start_end_index_compare_with_serialized_data( dim: gtx.Dimension, backend: gtx_typing.Backend, ) -> None: - serialized_grid = grid_savepoint.construct_icon_grid() + serialized_grid = grid_savepoint.construct_icon_grid(backend=backend) grid = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend).grid for domain in h_grid.get_domains_for_dim(dim): @@ -469,11 +469,11 @@ def test_tangent_orientation( experiment: definitions.Experiment, backend: gtx_typing.Backend, ) -> None: - expected = grid_savepoint.tangent_orientation() + expected = data_alloc.as_numpy(grid_savepoint.tangent_orientation()) manager = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend) assert test_utils.dallclose( manager.geometry_fields[gridfile.GeometryName.TANGENT_ORIENTATION].asnumpy(), - expected.asnumpy(), + expected, ) @@ -483,11 +483,11 @@ def test_edge_orientation_on_vertex( experiment: definitions.Experiment, backend: gtx_typing.Backend, ) -> None: - expected = grid_savepoint.vertex_edge_orientation() + expected = data_alloc.as_numpy(grid_savepoint.vertex_edge_orientation()) manager = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend) assert test_utils.dallclose( manager.geometry_fields[gridfile.GeometryName.EDGE_ORIENTATION_ON_VERTEX].asnumpy(), - expected.asnumpy(), + expected, ) @@ -526,11 +526,11 @@ def test_cell_normal_orientation( experiment: definitions.Experiment, backend: gtx_typing.Backend, ) -> None: - expected = grid_savepoint.edge_orientation() + expected = data_alloc.as_numpy(grid_savepoint.edge_orientation()) manager = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend) assert test_utils.dallclose( manager.geometry_fields[gridfile.GeometryName.CELL_NORMAL_ORIENTATION].asnumpy(), - expected.asnumpy(), + expected, ) @@ -540,12 +540,12 @@ def test_edge_vertex_distance( experiment: definitions.Experiment, backend: gtx_typing.Backend, ) -> None: - expected = grid_savepoint.edge_vert_length() + expected = data_alloc.as_numpy(grid_savepoint.edge_vert_length()) manager = utils.run_grid_manager(experiment.grid, keep_skip_values=True, backend=backend) assert test_utils.dallclose( manager.geometry_fields[gridfile.GeometryName.EDGE_VERTEX_DISTANCE].asnumpy(), - expected.asnumpy(), + expected, equal_nan=True, ) @@ -574,10 +574,10 @@ def test_decomposition_info_single_rank( grid_file = experiment.grid gm = utils.run_grid_manager(grid_file, keep_skip_values=True, backend=backend) result = gm.decomposition_info - assert np.all(data_alloc.as_numpy(result.local_index(dim)) == expected.local_index(dim)) - assert np.all(data_alloc.as_numpy(result.global_index(dim)) == expected.global_index(dim)) - assert np.all(data_alloc.as_numpy(result.owner_mask(dim)) == expected.owner_mask(dim)) - assert np.all(data_alloc.as_numpy(result.halo_levels(dim)) == expected.halo_levels(dim)) + assert (result.local_index(dim) == expected.local_index(dim)).all() + assert (result.global_index(dim) == expected.global_index(dim)).all() + assert (result.owner_mask(dim) == expected.owner_mask(dim)).all() + assert (result.halo_levels(dim) == expected.halo_levels(dim)).all() @pytest.mark.parametrize("rank", (0, 1, 2, 3), ids=lambda rank: f"rank{rank}") diff --git a/model/common/tests/common/interpolation/unit_tests/test_rbf_interpolation.py b/model/common/tests/common/interpolation/unit_tests/test_rbf_interpolation.py index c6bfb0000b..5f57daaf85 100644 --- a/model/common/tests/common/interpolation/unit_tests/test_rbf_interpolation.py +++ b/model/common/tests/common/interpolation/unit_tests/test_rbf_interpolation.py @@ -79,7 +79,7 @@ def test_construct_rbf_matrix_offsets_tables_for_cells( ) assert np.max(offset_table) == grid.num_edges - 1 - offset_table_savepoint = grid_savepoint.c2e2c2e() + offset_table_savepoint = data_alloc.as_numpy(grid_savepoint.c2e2c2e()) assert offset_table.shape == offset_table_savepoint.shape # Savepoint neighbors before start index may not be populated correctly, @@ -111,7 +111,7 @@ def test_construct_rbf_matrix_offsets_tables_for_edges( ) assert np.max(offset_table) == grid.num_edges - 1 - offset_table_savepoint = grid_savepoint.e2c2e() + offset_table_savepoint = data_alloc.as_numpy(grid_savepoint.e2c2e()) assert offset_table.shape == offset_table_savepoint.shape start_index = grid.start_index( @@ -141,7 +141,7 @@ def test_construct_rbf_matrix_offsets_tables_for_vertices( ) assert np.max(offset_table) == grid.num_edges - 1 - offset_table_savepoint = grid_savepoint.v2e() + offset_table_savepoint = data_alloc.as_numpy(grid_savepoint.v2e()) assert offset_table.shape == offset_table_savepoint.shape start_index = grid.start_index( diff --git a/model/common/tests/common/math/unit_tests/test_smagorinsky.py b/model/common/tests/common/math/unit_tests/test_smagorinsky.py index e43a97ab71..c8d673436a 100644 --- a/model/common/tests/common/math/unit_tests/test_smagorinsky.py +++ b/model/common/tests/common/math/unit_tests/test_smagorinsky.py @@ -24,9 +24,8 @@ def test_init_enh_smag_fac(backend_like: model_backends.BackendLike, grid: base_ a_vec = data_alloc.random_field( grid, dims.KDim, low=1.0, high=10.0, extend={dims.KDim: 1}, allocator=backend ) - xp = data_alloc.import_array_ns(backend) - fac = xp.asarray([0.67, 0.5, 1.3, 0.8]) - z = xp.asarray([0.1, 0.2, 0.3, 0.4]) + fac = np.asarray([0.67, 0.5, 1.3, 0.8]) + z = np.asarray([0.1, 0.2, 0.3, 0.4]) enhanced_smag_fac_np = enhanced_smagorinski_factor_numpy(fac, z, a_vec.asnumpy()) en_smag_fac_for_zero_nshift.with_backend(backend)( From 57d1694daa3e435f220bca08dffe386424674ce5 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 19 Mar 2026 14:21:46 +0100 Subject: [PATCH 52/68] Slightly loosen test_parallel_grid_manager.py tolerances again --- .../tests/common/grid/mpi_tests/test_parallel_grid_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index b64240a9a7..3399019422 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -228,7 +228,7 @@ def test_geometry_fields_compare_single_multi_rank( global_reference_field=field_ref.asnumpy(), local_field=field.asnumpy(), check_halos=True, - atol=0.0, + atol=1e-15, ) _log.info(f"rank = {processor_props.rank} - DONE") @@ -346,7 +346,7 @@ def test_interpolation_fields_compare_single_multi_rank( global_reference_field=field_ref.asnumpy(), local_field=field.asnumpy(), check_halos=True, - atol=3e-9 if attrs_name.startswith("rbf") else 0.0, + atol=3e-9 if attrs_name.startswith("rbf") else 1e-10 if attrs_name.startswith("pos_on_tplane") else 1e-15, ) _log.info(f"rank = {processor_props.rank} - DONE") From 61a3f45614ad26f3c3d2f2bd34eac4e37fbaf681 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 19 Mar 2026 14:33:00 +0100 Subject: [PATCH 53/68] print failures immediately in ci --- ci/distributed.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index cfb8825d63..a0797d5f90 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -84,7 +84,10 @@ build_distributed: - source ${UV_PROJECT_ENVIRONMENT}/bin/activate - echo "running with $(python --version)" script: - - ci/scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT + # TODO + # - ci/scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT + - uv pip install pytest-instafail + - pytest --instafail -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT parallel: matrix: - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] From fb209bb1ed42eacad9027384a4a70f60160e0232 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 19 Mar 2026 14:40:04 +0100 Subject: [PATCH 54/68] Fix formatting and linter warnings --- .../mpi_tests/test_mpi_decomposition.py | 12 ++++++------ .../common/decomposition/unit_tests/test_halo.py | 4 +++- .../grid/mpi_tests/test_parallel_grid_manager.py | 8 ++++++-- .../grid/mpi_tests/test_parallel_grid_refinement.py | 5 ++++- .../tests/common/math/unit_tests/test_smagorinsky.py | 4 ++-- .../src/icon4py/model/testing/parallel_helpers.py | 4 +++- model/testing/src/icon4py/model/testing/serialbox.py | 4 +++- 7 files changed, 27 insertions(+), 14 deletions(-) diff --git a/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py b/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py index c2cb0d1ac5..3f66caca96 100644 --- a/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py +++ b/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py @@ -304,12 +304,12 @@ def test_exchange_on_dummy_data( allocator=backend, ) - halo_points = data_alloc.as_numpy(decomposition_info.local_index( - dimension, definitions.DecompositionInfo.EntryType.HALO - )) - local_points = data_alloc.as_numpy(decomposition_info.local_index( - dimension, definitions.DecompositionInfo.EntryType.OWNED - )) + halo_points = data_alloc.as_numpy( + decomposition_info.local_index(dimension, definitions.DecompositionInfo.EntryType.HALO) + ) + local_points = data_alloc.as_numpy( + decomposition_info.local_index(dimension, definitions.DecompositionInfo.EntryType.OWNED) + ) assert (input_field.ndarray == number).all() exchange.exchange(dimension, input_field, stream=definitions.BLOCK) result = input_field.asnumpy() diff --git a/model/common/tests/common/decomposition/unit_tests/test_halo.py b/model/common/tests/common/decomposition/unit_tests/test_halo.py index d37a234edc..2145fe6be1 100644 --- a/model/common/tests/common/decomposition/unit_tests/test_halo.py +++ b/model/common/tests/common/decomposition/unit_tests/test_halo.py @@ -34,7 +34,9 @@ def test_halo_constructor_owned_cells(rank, simple_neighbor_tables, backend_like allocator=allocator, ) xp = data_alloc.import_array_ns(allocator) - my_owned_cells = data_alloc.as_numpy(halo_generator.owned_cells(xp.asarray(utils.SIMPLE_DISTRIBUTION))) + my_owned_cells = data_alloc.as_numpy( + halo_generator.owned_cells(xp.asarray(utils.SIMPLE_DISTRIBUTION)) + ) print(f"rank {processor_props.rank} owns {my_owned_cells} ") assert my_owned_cells.size == len(utils._CELL_OWN[processor_props.rank]) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index 3399019422..680697936d 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -346,7 +346,11 @@ def test_interpolation_fields_compare_single_multi_rank( global_reference_field=field_ref.asnumpy(), local_field=field.asnumpy(), check_halos=True, - atol=3e-9 if attrs_name.startswith("rbf") else 1e-10 if attrs_name.startswith("pos_on_tplane") else 1e-15, + atol=3e-9 + if attrs_name.startswith("rbf") + else 1e-10 + if attrs_name.startswith("pos_on_tplane") + else 1e-15, ) _log.info(f"rank = {processor_props.rank} - DONE") @@ -716,7 +720,7 @@ def test_metrics_mask_prog_halo_c( ) assert ( field[halo_indices] - == ~((c_refin_ctrl[halo_indices] >= 1) & (c_refin_ctrl[halo_indices] <= 4)) + == xp.invert((c_refin_ctrl[halo_indices] >= 1) & (c_refin_ctrl[halo_indices] <= 4)) ).all(), f"rank={processor_props.rank} - halo for MASK_PROG_HALO_C is incorrect" _log.info(f"rank = {processor_props.rank} - DONE") diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py index a953113b47..b7dae66a95 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_refinement.py @@ -81,7 +81,10 @@ def test_compute_domain_bounds( decomposition_info = grid_savepoint.construct_decomposition_info() refin_ctrl = {dim: grid_savepoint.refin_ctrl(dim) for dim in utils.main_horizontal_dims()} start_indices, end_indices = grid_refinement.compute_domain_bounds( - dim, refin_ctrl, decomposition_info, array_ns=data_alloc.import_array_ns(backend), + dim, + refin_ctrl, + decomposition_info, + array_ns=data_alloc.import_array_ns(backend), ) if ( experiment == definitions.Experiments.GAUSS3D diff --git a/model/common/tests/common/math/unit_tests/test_smagorinsky.py b/model/common/tests/common/math/unit_tests/test_smagorinsky.py index c8d673436a..4c0e27e116 100644 --- a/model/common/tests/common/math/unit_tests/test_smagorinsky.py +++ b/model/common/tests/common/math/unit_tests/test_smagorinsky.py @@ -24,8 +24,8 @@ def test_init_enh_smag_fac(backend_like: model_backends.BackendLike, grid: base_ a_vec = data_alloc.random_field( grid, dims.KDim, low=1.0, high=10.0, extend={dims.KDim: 1}, allocator=backend ) - fac = np.asarray([0.67, 0.5, 1.3, 0.8]) - z = np.asarray([0.1, 0.2, 0.3, 0.4]) + fac = (0.67, 0.5, 1.3, 0.8) + z = (0.1, 0.2, 0.3, 0.4) enhanced_smag_fac_np = enhanced_smagorinski_factor_numpy(fac, z, a_vec.asnumpy()) en_smag_fac_for_zero_nshift.with_backend(backend)( diff --git a/model/testing/src/icon4py/model/testing/parallel_helpers.py b/model/testing/src/icon4py/model/testing/parallel_helpers.py index 8d4a24782f..43a5e99caa 100644 --- a/model/testing/src/icon4py/model/testing/parallel_helpers.py +++ b/model/testing/src/icon4py/model/testing/parallel_helpers.py @@ -124,7 +124,9 @@ def check_local_global_field( gathered_sizes, gathered_field = gather_field(owned_entries, processor_props) global_index_sizes, gathered_global_indices = gather_field( - data_alloc.as_numpy(decomposition_info.global_index(dim, definitions.DecompositionInfo.EntryType.OWNED)), + data_alloc.as_numpy( + decomposition_info.global_index(dim, definitions.DecompositionInfo.EntryType.OWNED) + ), processor_props, ) diff --git a/model/testing/src/icon4py/model/testing/serialbox.py b/model/testing/src/icon4py/model/testing/serialbox.py index 151c71d1a5..bf609f41b1 100644 --- a/model/testing/src/icon4py/model/testing/serialbox.py +++ b/model/testing/src/icon4py/model/testing/serialbox.py @@ -135,7 +135,9 @@ def _read_bool(self, name: str): return self._read(name, offset=0, dtype=bool) def _read(self, name: str, offset=0, dtype=int): - return self.xp.asarray(self.xp.squeeze(self.serializer.read(name, self.savepoint) - offset).astype(dtype)) + return self.xp.asarray( + self.xp.squeeze(self.serializer.read(name, self.savepoint) - offset).astype(dtype) + ) class IconGridSavepoint(IconSavepoint): From a5c633ff13a930372a4b9765cb0848abd66fc1b3 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 20 Mar 2026 13:15:34 +0100 Subject: [PATCH 55/68] Make some field tests unit tests in test_parallel_grid_manager.py --- .../mpi_tests/test_parallel_grid_manager.py | 212 +++++++++++------- 1 file changed, 135 insertions(+), 77 deletions(-) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index 680697936d..196cadf5fe 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -234,34 +234,7 @@ def test_geometry_fields_compare_single_multi_rank( _log.info(f"rank = {processor_props.rank} - DONE") -@pytest.mark.mpi -@pytest.mark.parametrize("processor_props", [True], indirect=True) -@pytest.mark.parametrize( - "attrs_name", - [ - interpolation_attributes.CELL_AW_VERTS, - interpolation_attributes.C_BLN_AVG, - interpolation_attributes.C_LIN_E, - interpolation_attributes.E_BLN_C_S, - interpolation_attributes.E_FLX_AVG, - interpolation_attributes.GEOFAC_DIV, - interpolation_attributes.GEOFAC_GRDIV, - interpolation_attributes.GEOFAC_GRG_X, - interpolation_attributes.GEOFAC_GRG_Y, - interpolation_attributes.GEOFAC_N2S, - interpolation_attributes.GEOFAC_ROT, - interpolation_attributes.LSQ_PSEUDOINV, - interpolation_attributes.NUDGECOEFFS_E, - interpolation_attributes.POS_ON_TPLANE_E_X, - interpolation_attributes.POS_ON_TPLANE_E_Y, - interpolation_attributes.RBF_VEC_COEFF_C1, - interpolation_attributes.RBF_VEC_COEFF_C2, - interpolation_attributes.RBF_VEC_COEFF_E, - interpolation_attributes.RBF_VEC_COEFF_V1, - interpolation_attributes.RBF_VEC_COEFF_V2, - ], -) -def test_interpolation_fields_compare_single_multi_rank( +def _compare_interpolation_fields_single_multi_rank( processor_props: decomp_defs.ProcessProperties, backend: gtx_typing.Backend | None, experiment: test_defs.Experiment, @@ -356,62 +329,65 @@ def test_interpolation_fields_compare_single_multi_rank( _log.info(f"rank = {processor_props.rank} - DONE") +@pytest.mark.level("unit") @pytest.mark.mpi @pytest.mark.parametrize("processor_props", [True], indirect=True) @pytest.mark.parametrize( "attrs_name", [ - metrics_attributes.CELL_HEIGHT_ON_HALF_LEVEL, - metrics_attributes.COEFF1_DWDZ, - metrics_attributes.COEFF2_DWDZ, - metrics_attributes.COEFF_GRADEKIN, - metrics_attributes.D2DEXDZ2_FAC1_MC, - metrics_attributes.D2DEXDZ2_FAC2_MC, - metrics_attributes.DDQZ_Z_FULL, - metrics_attributes.DDQZ_Z_FULL_E, - metrics_attributes.DDQZ_Z_HALF, - metrics_attributes.DDXN_Z_FULL, - metrics_attributes.DDXN_Z_HALF_E, - metrics_attributes.DDXT_Z_FULL, - metrics_attributes.DDXT_Z_HALF_E, - metrics_attributes.D_EXNER_DZ_REF_IC, - metrics_attributes.EXNER_EXFAC, - metrics_attributes.EXNER_REF_MC, - metrics_attributes.EXNER_W_EXPLICIT_WEIGHT_PARAMETER, - metrics_attributes.EXNER_W_IMPLICIT_WEIGHT_PARAMETER, - metrics_attributes.FLAT_IDX_MAX, - metrics_attributes.HORIZONTAL_MASK_FOR_3D_DIVDAMP, - metrics_attributes.INV_DDQZ_Z_FULL, - metrics_attributes.MAXHGTD, - metrics_attributes.MAXHGTD_AVG, - metrics_attributes.MAXSLP, - metrics_attributes.MAXSLP_AVG, - metrics_attributes.MAX_NBHGT, - metrics_attributes.NFLAT_GRADP, - metrics_attributes.PG_EXDIST_DSL, - metrics_attributes.RAYLEIGH_W, - metrics_attributes.RHO_REF_MC, - metrics_attributes.RHO_REF_ME, - metrics_attributes.SCALING_FACTOR_FOR_3D_DIVDAMP, - metrics_attributes.DEEPATMO_DIVH, - metrics_attributes.DEEPATMO_DIVZL, - metrics_attributes.DEEPATMO_DIVZU, - metrics_attributes.THETA_REF_IC, - metrics_attributes.THETA_REF_MC, - metrics_attributes.THETA_REF_ME, - metrics_attributes.VERTOFFSET_GRADP, - metrics_attributes.WGTFACQ_C, - metrics_attributes.WGTFACQ_E, - metrics_attributes.WGTFAC_C, - metrics_attributes.WGTFAC_E, - metrics_attributes.ZDIFF_GRADP, - metrics_attributes.ZD_DIFFCOEF, - metrics_attributes.ZD_INTCOEF, - metrics_attributes.ZD_VERTOFFSET, - metrics_attributes.Z_MC, + interpolation_attributes.CELL_AW_VERTS, + interpolation_attributes.C_BLN_AVG, + interpolation_attributes.C_LIN_E, + interpolation_attributes.E_BLN_C_S, + interpolation_attributes.GEOFAC_DIV, + interpolation_attributes.GEOFAC_ROT, + interpolation_attributes.LSQ_PSEUDOINV, + interpolation_attributes.NUDGECOEFFS_E, + interpolation_attributes.POS_ON_TPLANE_E_X, + interpolation_attributes.POS_ON_TPLANE_E_Y, + ], +) +def test_interpolation_fields_compare_single_multi_rank_unit( + processor_props: decomp_defs.ProcessProperties, + backend: gtx_typing.Backend | None, + experiment: test_defs.Experiment, + attrs_name: str, +) -> None: + _compare_interpolation_fields_single_multi_rank( + processor_props, backend, experiment, attrs_name + ) + + +@pytest.mark.level("integration") +@pytest.mark.mpi +@pytest.mark.parametrize("processor_props", [True], indirect=True) +@pytest.mark.parametrize( + "attrs_name", + [ + interpolation_attributes.E_FLX_AVG, + interpolation_attributes.GEOFAC_GRDIV, + interpolation_attributes.GEOFAC_GRG_X, + interpolation_attributes.GEOFAC_GRG_Y, + interpolation_attributes.GEOFAC_N2S, + interpolation_attributes.RBF_VEC_COEFF_C1, + interpolation_attributes.RBF_VEC_COEFF_C2, + interpolation_attributes.RBF_VEC_COEFF_E, + interpolation_attributes.RBF_VEC_COEFF_V1, + interpolation_attributes.RBF_VEC_COEFF_V2, ], ) -def test_metrics_fields_compare_single_multi_rank( +def test_interpolation_fields_compare_single_multi_rank_integration( + processor_props: decomp_defs.ProcessProperties, + backend: gtx_typing.Backend | None, + experiment: test_defs.Experiment, + attrs_name: str, +) -> None: + _compare_interpolation_fields_single_multi_rank( + processor_props, backend, experiment, attrs_name + ) + + +def _compare_metrics_fields_single_multi_rank( processor_props: decomp_defs.ProcessProperties, backend: gtx_typing.Backend | None, experiment: test_defs.Experiment, @@ -590,6 +566,88 @@ def test_metrics_fields_compare_single_multi_rank( _log.info(f"rank = {processor_props.rank} - DONE") +@pytest.mark.level("unit") +@pytest.mark.mpi +@pytest.mark.parametrize("processor_props", [True], indirect=True) +@pytest.mark.parametrize( + "attrs_name", + [ + metrics_attributes.CELL_HEIGHT_ON_HALF_LEVEL, + metrics_attributes.COEFF_GRADEKIN, + metrics_attributes.DDQZ_Z_FULL, + metrics_attributes.DDXN_Z_HALF_E, + metrics_attributes.DDXT_Z_HALF_E, + metrics_attributes.D_EXNER_DZ_REF_IC, + metrics_attributes.EXNER_REF_MC, + metrics_attributes.EXNER_W_IMPLICIT_WEIGHT_PARAMETER, + metrics_attributes.FLAT_IDX_MAX, + metrics_attributes.HORIZONTAL_MASK_FOR_3D_DIVDAMP, + metrics_attributes.INV_DDQZ_Z_FULL, + metrics_attributes.MAXHGTD, + metrics_attributes.MAXSLP, + metrics_attributes.MAX_NBHGT, + metrics_attributes.PG_EXDIST_DSL, + metrics_attributes.RAYLEIGH_W, + metrics_attributes.RHO_REF_MC, + metrics_attributes.RHO_REF_ME, + metrics_attributes.SCALING_FACTOR_FOR_3D_DIVDAMP, + metrics_attributes.DEEPATMO_DIVH, + metrics_attributes.DEEPATMO_DIVZL, + metrics_attributes.DEEPATMO_DIVZU, + metrics_attributes.THETA_REF_IC, + metrics_attributes.THETA_REF_MC, + metrics_attributes.THETA_REF_ME, + metrics_attributes.VERTOFFSET_GRADP, + metrics_attributes.WGTFACQ_C, + metrics_attributes.WGTFAC_C, + metrics_attributes.ZDIFF_GRADP, + metrics_attributes.ZD_DIFFCOEF, + metrics_attributes.Z_MC, + ], +) +def test_metrics_fields_compare_single_multi_rank_unit( + processor_props: decomp_defs.ProcessProperties, + backend: gtx_typing.Backend | None, + experiment: test_defs.Experiment, + attrs_name: str, +) -> None: + _compare_metrics_fields_single_multi_rank(processor_props, backend, experiment, attrs_name) + + +@pytest.mark.level("integration") +@pytest.mark.mpi +@pytest.mark.parametrize("processor_props", [True], indirect=True) +@pytest.mark.parametrize( + "attrs_name", + [ + metrics_attributes.COEFF1_DWDZ, + metrics_attributes.COEFF2_DWDZ, + metrics_attributes.D2DEXDZ2_FAC1_MC, + metrics_attributes.D2DEXDZ2_FAC2_MC, + metrics_attributes.DDQZ_Z_FULL_E, + metrics_attributes.DDQZ_Z_HALF, + metrics_attributes.DDXN_Z_FULL, + metrics_attributes.DDXT_Z_FULL, + metrics_attributes.EXNER_EXFAC, + metrics_attributes.EXNER_W_EXPLICIT_WEIGHT_PARAMETER, + metrics_attributes.MAXHGTD_AVG, + metrics_attributes.MAXSLP_AVG, + metrics_attributes.NFLAT_GRADP, + metrics_attributes.WGTFACQ_E, + metrics_attributes.WGTFAC_E, + metrics_attributes.ZD_INTCOEF, + metrics_attributes.ZD_VERTOFFSET, + ], +) +def test_metrics_fields_compare_single_multi_rank_integration( + processor_props: decomp_defs.ProcessProperties, + backend: gtx_typing.Backend | None, + experiment: test_defs.Experiment, + attrs_name: str, +) -> None: + _compare_metrics_fields_single_multi_rank(processor_props, backend, experiment, attrs_name) + + # MASK_PROG_HALO_C is defined specially only on halos, so we have a separate # test for it. It doesn't make sense to compare to a single-rank reference since # it has no halos. From 20c0ea1166ef9f20a631b7ebfb8b442fbaf2fc77 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 20 Mar 2026 15:30:50 +0100 Subject: [PATCH 56/68] Don't test r01b01 grid anymore --- model/testing/src/icon4py/model/testing/fixtures/datatest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/model/testing/src/icon4py/model/testing/fixtures/datatest.py b/model/testing/src/icon4py/model/testing/fixtures/datatest.py index 7730656d5a..8b07dd27b9 100644 --- a/model/testing/src/icon4py/model/testing/fixtures/datatest.py +++ b/model/testing/src/icon4py/model/testing/fixtures/datatest.py @@ -81,7 +81,6 @@ def cpu_allocator() -> gtx_typing.Allocator: @pytest.fixture( params=[ - definitions.Grids.R01B01_GLOBAL, definitions.Grids.R02B04_GLOBAL, definitions.Grids.MCH_CH_R04B09_DSL, definitions.Grids.TORUS_50000x5000, From 7cccfef7757f64603e9fe7d045556bbc7d5aea59 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 20 Mar 2026 16:16:05 +0100 Subject: [PATCH 57/68] Split geometry fields test in test_parallel_grid_manager.py into unit and integration test --- .../mpi_tests/test_parallel_grid_manager.py | 165 +++++++++++------- 1 file changed, 98 insertions(+), 67 deletions(-) diff --git a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py index 196cadf5fe..c062594487 100644 --- a/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py +++ b/model/common/tests/common/grid/mpi_tests/test_parallel_grid_manager.py @@ -102,62 +102,7 @@ def _get_neighbor_tables(grid: base.Grid) -> dict: } -@pytest.mark.mpi -@pytest.mark.parametrize("processor_props", [True], indirect=True) -@pytest.mark.parametrize( - "attrs_name", - [ - geometry_attributes.CELL_AREA, - geometry_attributes.CELL_CENTER_X, - geometry_attributes.CELL_CENTER_Y, - geometry_attributes.CELL_CENTER_Z, - geometry_attributes.CELL_LAT, - geometry_attributes.CELL_LON, - geometry_attributes.CELL_NORMAL_ORIENTATION, - geometry_attributes.CORIOLIS_PARAMETER, - geometry_attributes.DUAL_AREA, - geometry_attributes.DUAL_EDGE_LENGTH, - f"inverse_of_{geometry_attributes.DUAL_EDGE_LENGTH}", - geometry_attributes.EDGE_AREA, - geometry_attributes.EDGE_CELL_DISTANCE, - geometry_attributes.EDGE_CENTER_X, - geometry_attributes.EDGE_CENTER_Y, - geometry_attributes.EDGE_CENTER_Z, - geometry_attributes.EDGE_DUAL_U, - geometry_attributes.EDGE_DUAL_V, - geometry_attributes.EDGE_LAT, - f"inverse_of_{geometry_attributes.EDGE_LENGTH}", - geometry_attributes.EDGE_LENGTH, - geometry_attributes.EDGE_LON, - geometry_attributes.EDGE_NORMAL_CELL_U, - geometry_attributes.EDGE_NORMAL_CELL_V, - geometry_attributes.EDGE_NORMAL_U, - geometry_attributes.EDGE_NORMAL_V, - geometry_attributes.EDGE_NORMAL_VERTEX_U, - geometry_attributes.EDGE_NORMAL_VERTEX_V, - geometry_attributes.EDGE_NORMAL_X, - geometry_attributes.EDGE_NORMAL_Y, - geometry_attributes.EDGE_NORMAL_Z, - geometry_attributes.EDGE_TANGENT_CELL_U, - geometry_attributes.EDGE_TANGENT_CELL_V, - geometry_attributes.EDGE_TANGENT_VERTEX_U, - geometry_attributes.EDGE_TANGENT_VERTEX_V, - geometry_attributes.EDGE_TANGENT_X, - geometry_attributes.EDGE_TANGENT_Y, - geometry_attributes.EDGE_TANGENT_Z, - geometry_attributes.EDGE_VERTEX_DISTANCE, - geometry_attributes.TANGENT_ORIENTATION, - geometry_attributes.VERTEX_EDGE_ORIENTATION, - geometry_attributes.VERTEX_LAT, - geometry_attributes.VERTEX_LON, - geometry_attributes.VERTEX_VERTEX_LENGTH, - f"inverse_of_{geometry_attributes.VERTEX_VERTEX_LENGTH}", - geometry_attributes.VERTEX_X, - geometry_attributes.VERTEX_Y, - geometry_attributes.VERTEX_Z, - ], -) -def test_geometry_fields_compare_single_multi_rank( +def _compare_geometry_fields_single_multi_rank( processor_props: decomp_defs.ProcessProperties, backend: gtx_typing.Backend | None, grid_description: test_defs.GridDescription, @@ -234,6 +179,92 @@ def test_geometry_fields_compare_single_multi_rank( _log.info(f"rank = {processor_props.rank} - DONE") +@pytest.mark.level("unit") +@pytest.mark.mpi +@pytest.mark.parametrize("processor_props", [True], indirect=True) +@pytest.mark.parametrize( + "attrs_name", + [ + geometry_attributes.CELL_CENTER_Y, + geometry_attributes.CELL_CENTER_Z, + geometry_attributes.CELL_LON, + geometry_attributes.DUAL_EDGE_LENGTH, + geometry_attributes.EDGE_CENTER_Y, + geometry_attributes.EDGE_CENTER_Z, + geometry_attributes.EDGE_DUAL_V, + geometry_attributes.EDGE_LENGTH, + geometry_attributes.EDGE_LON, + geometry_attributes.EDGE_NORMAL_CELL_V, + geometry_attributes.EDGE_NORMAL_V, + geometry_attributes.EDGE_NORMAL_VERTEX_V, + geometry_attributes.EDGE_NORMAL_Y, + geometry_attributes.EDGE_NORMAL_Z, + geometry_attributes.EDGE_TANGENT_CELL_V, + geometry_attributes.EDGE_TANGENT_VERTEX_V, + geometry_attributes.EDGE_TANGENT_Y, + geometry_attributes.EDGE_TANGENT_Z, + geometry_attributes.VERTEX_LON, + geometry_attributes.VERTEX_VERTEX_LENGTH, + geometry_attributes.VERTEX_Y, + geometry_attributes.VERTEX_Z, + ], +) +def test_geometry_fields_compare_single_multi_rank_unit( + processor_props: decomp_defs.ProcessProperties, + backend: gtx_typing.Backend | None, + grid_description: test_defs.GridDescription, + attrs_name: str, +) -> None: + _compare_geometry_fields_single_multi_rank( + processor_props, backend, grid_description, attrs_name + ) + + +@pytest.mark.level("integration") +@pytest.mark.mpi +@pytest.mark.parametrize("processor_props", [True], indirect=True) +@pytest.mark.parametrize( + "attrs_name", + [ + geometry_attributes.CELL_AREA, + geometry_attributes.CELL_CENTER_X, + geometry_attributes.CELL_LAT, + geometry_attributes.CELL_NORMAL_ORIENTATION, + geometry_attributes.CORIOLIS_PARAMETER, + geometry_attributes.DUAL_AREA, + f"inverse_of_{geometry_attributes.DUAL_EDGE_LENGTH}", + geometry_attributes.EDGE_AREA, + geometry_attributes.EDGE_CELL_DISTANCE, + geometry_attributes.EDGE_CENTER_X, + geometry_attributes.EDGE_DUAL_U, + geometry_attributes.EDGE_LAT, + f"inverse_of_{geometry_attributes.EDGE_LENGTH}", + geometry_attributes.EDGE_NORMAL_CELL_U, + geometry_attributes.EDGE_NORMAL_U, + geometry_attributes.EDGE_NORMAL_VERTEX_U, + geometry_attributes.EDGE_NORMAL_X, + geometry_attributes.EDGE_TANGENT_CELL_U, + geometry_attributes.EDGE_TANGENT_VERTEX_U, + geometry_attributes.EDGE_TANGENT_X, + geometry_attributes.EDGE_VERTEX_DISTANCE, + geometry_attributes.TANGENT_ORIENTATION, + geometry_attributes.VERTEX_EDGE_ORIENTATION, + geometry_attributes.VERTEX_LAT, + f"inverse_of_{geometry_attributes.VERTEX_VERTEX_LENGTH}", + geometry_attributes.VERTEX_X, + ], +) +def test_geometry_fields_compare_single_multi_rank_integration( + processor_props: decomp_defs.ProcessProperties, + backend: gtx_typing.Backend | None, + grid_description: test_defs.GridDescription, + attrs_name: str, +) -> None: + _compare_geometry_fields_single_multi_rank( + processor_props, backend, grid_description, attrs_name + ) + + def _compare_interpolation_fields_single_multi_rank( processor_props: decomp_defs.ProcessProperties, backend: gtx_typing.Backend | None, @@ -340,11 +371,14 @@ def _compare_interpolation_fields_single_multi_rank( interpolation_attributes.C_LIN_E, interpolation_attributes.E_BLN_C_S, interpolation_attributes.GEOFAC_DIV, + interpolation_attributes.GEOFAC_GRG_Y, interpolation_attributes.GEOFAC_ROT, interpolation_attributes.LSQ_PSEUDOINV, interpolation_attributes.NUDGECOEFFS_E, interpolation_attributes.POS_ON_TPLANE_E_X, interpolation_attributes.POS_ON_TPLANE_E_Y, + interpolation_attributes.RBF_VEC_COEFF_C2, + interpolation_attributes.RBF_VEC_COEFF_V2, ], ) def test_interpolation_fields_compare_single_multi_rank_unit( @@ -367,13 +401,10 @@ def test_interpolation_fields_compare_single_multi_rank_unit( interpolation_attributes.E_FLX_AVG, interpolation_attributes.GEOFAC_GRDIV, interpolation_attributes.GEOFAC_GRG_X, - interpolation_attributes.GEOFAC_GRG_Y, interpolation_attributes.GEOFAC_N2S, interpolation_attributes.RBF_VEC_COEFF_C1, - interpolation_attributes.RBF_VEC_COEFF_C2, interpolation_attributes.RBF_VEC_COEFF_E, interpolation_attributes.RBF_VEC_COEFF_V1, - interpolation_attributes.RBF_VEC_COEFF_V2, ], ) def test_interpolation_fields_compare_single_multi_rank_integration( @@ -573,10 +604,16 @@ def _compare_metrics_fields_single_multi_rank( "attrs_name", [ metrics_attributes.CELL_HEIGHT_ON_HALF_LEVEL, + metrics_attributes.COEFF2_DWDZ, metrics_attributes.COEFF_GRADEKIN, + metrics_attributes.D2DEXDZ2_FAC2_MC, metrics_attributes.DDQZ_Z_FULL, metrics_attributes.DDXN_Z_HALF_E, + metrics_attributes.DDXT_Z_FULL, metrics_attributes.DDXT_Z_HALF_E, + metrics_attributes.DEEPATMO_DIVH, + metrics_attributes.DEEPATMO_DIVZL, + metrics_attributes.DEEPATMO_DIVZU, metrics_attributes.D_EXNER_DZ_REF_IC, metrics_attributes.EXNER_REF_MC, metrics_attributes.EXNER_W_IMPLICIT_WEIGHT_PARAMETER, @@ -585,15 +622,13 @@ def _compare_metrics_fields_single_multi_rank( metrics_attributes.INV_DDQZ_Z_FULL, metrics_attributes.MAXHGTD, metrics_attributes.MAXSLP, + metrics_attributes.MAXSLP_AVG, metrics_attributes.MAX_NBHGT, metrics_attributes.PG_EXDIST_DSL, metrics_attributes.RAYLEIGH_W, metrics_attributes.RHO_REF_MC, metrics_attributes.RHO_REF_ME, metrics_attributes.SCALING_FACTOR_FOR_3D_DIVDAMP, - metrics_attributes.DEEPATMO_DIVH, - metrics_attributes.DEEPATMO_DIVZL, - metrics_attributes.DEEPATMO_DIVZU, metrics_attributes.THETA_REF_IC, metrics_attributes.THETA_REF_MC, metrics_attributes.THETA_REF_ME, @@ -602,6 +637,7 @@ def _compare_metrics_fields_single_multi_rank( metrics_attributes.WGTFAC_C, metrics_attributes.ZDIFF_GRADP, metrics_attributes.ZD_DIFFCOEF, + metrics_attributes.ZD_VERTOFFSET, metrics_attributes.Z_MC, ], ) @@ -621,22 +657,17 @@ def test_metrics_fields_compare_single_multi_rank_unit( "attrs_name", [ metrics_attributes.COEFF1_DWDZ, - metrics_attributes.COEFF2_DWDZ, metrics_attributes.D2DEXDZ2_FAC1_MC, - metrics_attributes.D2DEXDZ2_FAC2_MC, metrics_attributes.DDQZ_Z_FULL_E, metrics_attributes.DDQZ_Z_HALF, metrics_attributes.DDXN_Z_FULL, - metrics_attributes.DDXT_Z_FULL, metrics_attributes.EXNER_EXFAC, metrics_attributes.EXNER_W_EXPLICIT_WEIGHT_PARAMETER, metrics_attributes.MAXHGTD_AVG, - metrics_attributes.MAXSLP_AVG, metrics_attributes.NFLAT_GRADP, metrics_attributes.WGTFACQ_E, metrics_attributes.WGTFAC_E, metrics_attributes.ZD_INTCOEF, - metrics_attributes.ZD_VERTOFFSET, ], ) def test_metrics_fields_compare_single_multi_rank_integration( From 0b063cc7ccda522275ae1fada1e04a62bd09a6f9 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 20 Mar 2026 19:53:45 +0100 Subject: [PATCH 58/68] Only run integration tests in distributed CI --- ci/distributed.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index a0797d5f90..6548091301 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -87,11 +87,12 @@ build_distributed: # TODO # - ci/scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT - uv pip install pytest-instafail - - pytest --instafail -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT + - pytest --instafail -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT --level=$LEVEL parallel: matrix: - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu, gtfn_gpu] + LEVEL: [integration] rules: - if: $COMPONENT == 'atmosphere/diffusion' variables: From eb01d9af847232d6bbed35c07af1110d2bd39490 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 23 Mar 2026 09:59:53 +0100 Subject: [PATCH 59/68] Apply suggestion from @msimberg --- ci/distributed.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 6548091301..e027aaa234 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -84,10 +84,7 @@ build_distributed: - source ${UV_PROJECT_ENVIRONMENT}/bin/activate - echo "running with $(python --version)" script: - # TODO - # - ci/scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT - - uv pip install pytest-instafail - - pytest --instafail -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT --level=$LEVEL + - ci/scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT --level=$LEVEL parallel: matrix: - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] From fd93fb87844d2cffd640d2653aa29a13817c9ac2 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 23 Mar 2026 11:40:07 +0100 Subject: [PATCH 60/68] Test only dace_gpu/common in distributed pipeline --- ci/distributed.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index e027aaa234..a9f261c29d 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -87,8 +87,10 @@ build_distributed: - ci/scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT --level=$LEVEL parallel: matrix: - - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] - BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu, gtfn_gpu] + # - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] + - COMPONENT: [common] + # BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu, gtfn_gpu] + BACKEND: [dace_gpu] LEVEL: [integration] rules: - if: $COMPONENT == 'atmosphere/diffusion' From 702d0bdb29d7188a6e4059e55697608757f98cc6 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 23 Mar 2026 13:38:09 +0100 Subject: [PATCH 61/68] Try persistent cache and more workers on distributed CI pipeline --- ci/distributed.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/distributed.yml b/ci/distributed.yml index a9f261c29d..1a56ab526c 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -62,6 +62,8 @@ build_distributed: ICON4PY_TEST_DATA_PATH: "/icon4py/testdata" ICON4PY_ENABLE_GRID_DOWNLOAD: false ICON4PY_ENABLE_TESTDATA_DOWNLOAD: false + GT4PY_BUILD_JOBS: 32 + GT4PY_BUILD_CACHE_LIFETIME: "persistent" PYTEST_ADDOPTS: "--durations=0" CSCS_ADDITIONAL_MOUNTS: '["/capstor/store/cscs/userlab/cwci02/icon4py/ci/testdata:$ICON4PY_TEST_DATA_PATH"]' # Do not use libfabric from the host system. Libfabric with slingshot From a7f60f08270118d686d93521284a6518a7331ba1 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 24 Mar 2026 16:30:22 +0100 Subject: [PATCH 62/68] Test only gtfn_gpu --- ci/distributed.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 1a56ab526c..b039cde5ca 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -92,7 +92,8 @@ build_distributed: # - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] - COMPONENT: [common] # BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu, gtfn_gpu] - BACKEND: [dace_gpu] + # BACKEND: [dace_gpu] + BACKEND: [gtfn_gpu] LEVEL: [integration] rules: - if: $COMPONENT == 'atmosphere/diffusion' From 5652ce8458a46d7bfe4d8d6ea9cec33bbfb29a1f Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 09:48:49 +0100 Subject: [PATCH 63/68] Update distributed config --- ci/distributed.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index b039cde5ca..d3ecfef697 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -89,11 +89,10 @@ build_distributed: - ci/scripts/ci-mpi-wrapper.sh pytest -sv -k mpi_tests --with-mpi --backend=$BACKEND model/$COMPONENT --level=$LEVEL parallel: matrix: - # - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] - - COMPONENT: [common] - # BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu, gtfn_gpu] - # BACKEND: [dace_gpu] - BACKEND: [gtfn_gpu] + - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] + # TODO(msimberg): Enable dace_gpu when compilation doesn't take as long + # or when we can cache across CI jobs. + BACKEND: [embedded, gtfn_cpu, dace_cpu, gtfn_gpu] LEVEL: [integration] rules: - if: $COMPONENT == 'atmosphere/diffusion' @@ -104,7 +103,7 @@ build_distributed: SLURM_TIMELIMIT: '00:30:00' - if: $COMPONENT == 'common' && ($BACKEND == 'dace_gpu' || $BACKEND == 'gtfn_gpu') variables: - # TODO(msimberg): This is very long, can we do better? + # TODO(msimberg): Decrease this when enabling dace_gpu above, if possible. SLURM_TIMELIMIT: '03:00:00' - if: $COMPONENT == 'atmosphere/dycore' variables: From 5ba050c0798136cfe6344d6fd887b6e4e1200be2 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 14:28:55 +0100 Subject: [PATCH 64/68] Upgrade mpi4py --- uv.lock | 58 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/uv.lock b/uv.lock index 8bae6ba865..0c21145096 100644 --- a/uv.lock +++ b/uv.lock @@ -2523,15 +2523,55 @@ wheels = [ [[package]] name = "mpi4py" -version = "4.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/08/34/8499a92a387d24d0092c38089f8195f13c5c76f0f814126af3fe363e5636/mpi4py-4.0.1.tar.gz", hash = "sha256:f3174b245775d556f4fddb32519a2066ef0592edc810c5b5a59238f9a0a40c89", size = 466179, upload-time = "2024-10-11T10:59:53.425Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/22/15/7d2fd2ca8b1ae362371b2bb9b2f787f9166b6ecd536e0e773dce6b98a5a9/mpi4py-4.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:600f26cae7f390b4ec525f5c1ccc374686c37a8c07f9c21320866c0a323f6dae", size = 1588594, upload-time = "2024-10-12T07:10:26.736Z" }, - { url = "https://files.pythonhosted.org/packages/f4/f7/6dfdee53f9806361ab75cb83ee5feab06a738f7f6a42715c79d72a783d31/mpi4py-4.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:0cb209fcdc7fee0346d12edff1cfd1c1ffca1b807c53631ba0436b9c2bcf8229", size = 1599377, upload-time = "2024-10-12T07:10:30.836Z" }, - { url = "https://files.pythonhosted.org/packages/35/28/7e5eae1a9940f48c41e208e9e6fdb56e497095030ab53e2d9ce702705cbb/mpi4py-4.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:b704e7db92b1ac94b96802e17cf28082455daa928e8e51398ad9f5e5eb8c9b7b", size = 1727556, upload-time = "2024-10-12T07:10:36.005Z" }, - { url = "https://files.pythonhosted.org/packages/95/70/cc361869a2920476ecc5f29c98e0130aaf2e177a0087cb7ebbafb90414f1/mpi4py-4.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:52a7b1760b1aeb41a0ea38969314b2b170117a0ded2f689915f1cb89aaaf8a6f", size = 1726170, upload-time = "2024-10-12T07:10:39.15Z" }, - { url = "https://files.pythonhosted.org/packages/17/23/81aed5da44f9d743f1e76909fd04ae5dc122ff7c9f97fa0b40b8f752245c/mpi4py-4.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:93f45dcc2fd5f3396f961b1bc8f0fb9d5db786fdc0d72e4f8611f47718b5dac8", size = 1584997, upload-time = "2024-10-12T07:10:52.704Z" }, +version = "4.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/62/74/28ea85b0b949cad827ea50720e00e814e88c8fd536c27c3c491e4f025724/mpi4py-4.1.1.tar.gz", hash = "sha256:eb2c8489bdbc47fdc6b26ca7576e927a11b070b6de196a443132766b3d0a2a22", size = 500518, upload-time = "2025-10-10T13:55:20.402Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/b3/2e7df40608f2188dca16e38f8030add1071f06b1cd94dd8a4e16b9acbd84/mpi4py-4.1.1-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:1586f5d1557abed9cba7e984d18f32e787b353be0986e599974db177ae36329a", size = 1422849, upload-time = "2025-10-10T13:53:40.082Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ed/970bd3edc0e614eccc726fa406255b88f728a8bc059e81f96f28d6ede0af/mpi4py-4.1.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:ba85e4778d63c750226de95115c92b709f38d7e661be660a275da4f0992ee197", size = 1326982, upload-time = "2025-10-10T13:53:42.32Z" }, + { url = "https://files.pythonhosted.org/packages/5d/c3/f9a5d1f9ba52ac6386bf3d3550027f42a6b102b0432113cc43294420feb2/mpi4py-4.1.1-cp310-abi3-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0a8332884626994d9ef48da233dc7a0355f4868dd7ff59f078d5813a2935b930", size = 1373127, upload-time = "2025-10-10T13:53:43.957Z" }, + { url = "https://files.pythonhosted.org/packages/84/d1/1fe75025df801d817ed49371c719559f742f3f263323442d34dbe3366af3/mpi4py-4.1.1-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6e0352860f0b3e18bc0dcb47e42e583ccb9472f89752d711a6fca46a38670554", size = 1225134, upload-time = "2025-10-10T13:53:45.583Z" }, + { url = "https://files.pythonhosted.org/packages/40/44/d653fec0e4ca8181645da4bfb2763017625e5b3f151b208fadd932cb1766/mpi4py-4.1.1-cp310-abi3-win_amd64.whl", hash = "sha256:0f46dfe666a599e4bd2641116b2b4852a3ed9d37915edf98fae471d666663128", size = 1478863, upload-time = "2025-10-10T13:53:47.178Z" }, + { url = "https://files.pythonhosted.org/packages/58/f7/793c9a532e5367cffb2b97ca6a879285ca73a14f79e6ff208bb390651a43/mpi4py-4.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9082e04c8afcffa7d650a262d800af1a617c555d610810deeab265a4a5f7d42e", size = 1585904, upload-time = "2025-10-10T13:53:49.129Z" }, + { url = "https://files.pythonhosted.org/packages/b7/fe/cdead6721426b25d817a1bf45d5adc6dc90fd8bb0831f5ca06a4edd2015c/mpi4py-4.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1d618e6a5a8f6f86c33a954356d8ed398bec31f34b63321570661ac157063bb6", size = 1438343, upload-time = "2025-10-10T13:53:51.098Z" }, + { url = "https://files.pythonhosted.org/packages/c0/c4/4a73c80cf483df603770278f0fdc57da5394edee376790c62f1eba04bb3b/mpi4py-4.1.1-cp310-cp310-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d4c460609bd6decc22ad89cbfe48e4c5a2461ff52ada9345a4c19edee39f93da", size = 1432321, upload-time = "2025-10-10T13:53:53.235Z" }, + { url = "https://files.pythonhosted.org/packages/49/56/7b32631f3cc5cf741610a108a7f40a3714c9862c1f637b5ded525af32be9/mpi4py-4.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c04a388c7a945e751c82742c6bb277434d26a67768a01952f7494d1c25dff94b", size = 1299883, upload-time = "2025-10-10T13:53:55.22Z" }, + { url = "https://files.pythonhosted.org/packages/14/76/53caf807ec74c042fbecf76162e071c09c53fb0ed66b1edf31dabd64c588/mpi4py-4.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:1ad4b225a5a1a02a2b89979ed8f328c6a2bc3bd6ad4a57e453727f90373fa5f8", size = 1622884, upload-time = "2025-10-10T13:53:56.882Z" }, + { url = "https://files.pythonhosted.org/packages/20/8f/5d28174048ef02fb91dd0759a32c07b272c9f1df265e19145712aa7bd712/mpi4py-4.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a428ba96b992a8911cf932fa71dd8c0260d47ab7e5dee2b09239ad91fc540b79", size = 1596913, upload-time = "2025-10-10T13:53:58.466Z" }, + { url = "https://files.pythonhosted.org/packages/ab/81/dce928b11816fac9713e93e609476ddac520fc50368aa7591728c329ff19/mpi4py-4.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fc0cf81445fac2ae2e5716c365fd72e1bb545df065f5a3f6731f64b3beed886e", size = 1433274, upload-time = "2025-10-10T13:54:00.508Z" }, + { url = "https://files.pythonhosted.org/packages/5d/15/1a869a35d3e3438866dc8d8c9cb04dc6aa484171343627a8baf82c3c1ca9/mpi4py-4.1.1-cp311-cp311-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a753d5d61b46f90260247f344a6c57c527a6a4e7bea126830120ab41c3d057e5", size = 1423333, upload-time = "2025-10-10T13:54:03.679Z" }, + { url = "https://files.pythonhosted.org/packages/25/33/072781fb85f5bc50b93ee7e8d3b3afb849d50570431b6cb2aa957db79b59/mpi4py-4.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4a36ef9d7b2b6b62026dbf9b59b44efb5430f7b9ca5fb855bfbf8d403218e37c", size = 1299183, upload-time = "2025-10-10T13:54:05.3Z" }, + { url = "https://files.pythonhosted.org/packages/f9/a7/152af3c6412702a4e0fcfd0fe572307ed52821de13db9c96535f31a39aa7/mpi4py-4.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:20bf4c0c65fd67287664f8b1b6dc7c7b341838f10bba34a2e452d47530ce8a5f", size = 1632284, upload-time = "2025-10-10T13:54:06.786Z" }, + { url = "https://files.pythonhosted.org/packages/ff/2c/e201cd4828555f10306a5439875cbd0ecfba766ace01ff5c6df43f795650/mpi4py-4.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d4403a7cec985be9963efc626193e6df3f63f5ada0c26373c28e640e623e56c3", size = 1669517, upload-time = "2025-10-10T13:54:08.404Z" }, + { url = "https://files.pythonhosted.org/packages/7b/53/18d978c3a19deecf38217ce54319e6c9162fec3569c4256c039b66eac2f4/mpi4py-4.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a2ffccc9f3a8c7c957403faad594d650c60234ac08cbedf45beaa96602debe9", size = 1454721, upload-time = "2025-10-10T13:54:09.977Z" }, + { url = "https://files.pythonhosted.org/packages/ee/15/b908d1d23a4bd2bd7b2e98de5df23b26e43145119fe294728bf89211b935/mpi4py-4.1.1-cp312-cp312-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ed3d9b619bf197a290f7fd67eb61b1c2a5c204afd9621651a50dc0b1c1280d45", size = 1448977, upload-time = "2025-10-10T13:54:11.65Z" }, + { url = "https://files.pythonhosted.org/packages/5d/19/088a2d37e80e0feb7851853b2a71cbe6f9b18bdf0eab680977864ea83aab/mpi4py-4.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0699c194db5d95fc2085711e4e0013083bd7ae9a88438e1fd64ddb67e9b0cf9e", size = 1318737, upload-time = "2025-10-10T13:54:13.075Z" }, + { url = "https://files.pythonhosted.org/packages/97/3a/526261f39bf096e5ff396d18b76740a58d872425612ff84113dd85c2c08e/mpi4py-4.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:0abf5490c3d49c30542b461bfc5ad88dd7d147a4bdb456b7163640577fdfef88", size = 1725676, upload-time = "2025-10-10T13:54:14.681Z" }, + { url = "https://files.pythonhosted.org/packages/30/75/2ffccd69360680a0216e71f90fd50dc8ff49711be54502d522a068196c68/mpi4py-4.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3dd973c509f2dbb6904c035a4a071509cde98decf0528fa21e2e7d5db5cc988", size = 1710002, upload-time = "2025-10-10T13:54:17.042Z" }, + { url = "https://files.pythonhosted.org/packages/3c/13/22fa9dcbc5e4ae6fd10cba6d49b7c879c30c5bea88f450f79b373d200f40/mpi4py-4.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c8c83a359e62dd7fdd030360f430e0e8986df029c0953ab216ff97a110038dc4", size = 1484623, upload-time = "2025-10-10T13:54:19.097Z" }, + { url = "https://files.pythonhosted.org/packages/47/01/476f0f9dc96261d02214009f42e10338fc56f260f1f10b23ee89c515c8b7/mpi4py-4.1.1-cp313-cp313-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:323ba354ba951c7736c033c5f2ad07bb1276f9696f0312ea6ff0a28cd0ab3e3d", size = 1448403, upload-time = "2025-10-10T13:54:21.211Z" }, + { url = "https://files.pythonhosted.org/packages/a2/20/dc990edb7b075ecdba4e02bcd03d1583faeb84f664d1585c4c00a0f9851a/mpi4py-4.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c4ef9fe5fb211b1c5b6afe521397e3feb01e104024d6bc37aa4289c370605e2", size = 1318018, upload-time = "2025-10-10T13:54:23.23Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bf/b0ab43a99ac2a1d6d5765cb7d2a4f093656090ce07528043057ecc3e87cb/mpi4py-4.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:e13a1ba26604514a12c95b7d76058ce800d5740d5f5f3b50c4b782cfa0dfaa1f", size = 1722939, upload-time = "2025-10-10T13:54:24.862Z" }, + { url = "https://files.pythonhosted.org/packages/84/26/3e00dc536311e758096414b4f33beb4c7f04dff875e87a6e88fbbe4fc2d8/mpi4py-4.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:28ce1f7412f5e99a6b9fe2547203633431d0ee45670413a475a07e6c785e63b1", size = 1798116, upload-time = "2025-10-10T13:54:26.378Z" }, + { url = "https://files.pythonhosted.org/packages/15/51/d06d2b126be5660aca8c00fe0d940a8658085038f61a9cfc834d3d5ffa80/mpi4py-4.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd1e49b84a0651018517e87daf68085719eca25e5c9a7cd05d98a73418c88836", size = 1586285, upload-time = "2025-10-10T13:54:27.838Z" }, + { url = "https://files.pythonhosted.org/packages/51/63/eeb936e0e8cfd8160b6b297645c730b22d242595861cf6a2fa627a358175/mpi4py-4.1.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:dd869ea7758b591ffbb1483588a6fbf84952a5090e80a45ea89674d55cf25f3b", size = 1514102, upload-time = "2025-10-10T13:54:29.297Z" }, + { url = "https://files.pythonhosted.org/packages/1a/c1/06967d4c107ea7169d2120c4fb86c404707e6de82e277dc9f0fa5a9c1bf1/mpi4py-4.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:475da0797442cba723c0ad37da6a1c51d9624e697dd8bf89f23d0fad81e73eda", size = 1395247, upload-time = "2025-10-10T13:54:30.881Z" }, + { url = "https://files.pythonhosted.org/packages/9e/7c/5f0f32b39185f0a7074c165dc37cdd235bfd737928a2fe223e41b308fb4c/mpi4py-4.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:8d3bfa074776d9507ee957f5230d11ecd03da23f601a85349a1a333eaf55e5fa", size = 1771515, upload-time = "2025-10-10T13:54:32.395Z" }, + { url = "https://files.pythonhosted.org/packages/6a/e8/93ddde2b6ee7631b46bb79b851630b3527d9060b9b999844bcd882977539/mpi4py-4.1.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:1deb6f9df28ec6972305287cb2035c20d3f5af59f687f962080756374c16e48f", size = 1713353, upload-time = "2025-10-10T13:54:33.934Z" }, + { url = "https://files.pythonhosted.org/packages/b2/23/449562bd23fcfbd7d01006b39429972bfed5dfb8541355d06d2e17c16c27/mpi4py-4.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1bb1e3ad0b9047b0dbc7b4014160a7ab2a84f1627be665527c7445fc312f189b", size = 1496415, upload-time = "2025-10-10T13:54:35.927Z" }, + { url = "https://files.pythonhosted.org/packages/51/33/9a5b9ae66cbb095b711f4ddae6d2d4b0f55202ac9e503fd588b101f04a22/mpi4py-4.1.1-cp314-cp314-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5f757e3089abf2c9db69fac1665fa99c52ed392fdf799159f25cba9ee3b64f5a", size = 1450750, upload-time = "2025-10-10T13:54:37.608Z" }, + { url = "https://files.pythonhosted.org/packages/d2/88/6acf948f19cb59c0e8843fed4ab4c471b7644e8a16c2d5d9c7ab6d73d573/mpi4py-4.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:807c6f1ed3adbc12952db52127e34cfbd6c48a05c3b3dd59deee2d2f09d78888", size = 1325773, upload-time = "2025-10-10T13:54:39.136Z" }, + { url = "https://files.pythonhosted.org/packages/6a/b4/3021e073772cd9e1062a810b7298e68ea40933fb91b1c1c0d07c968dce5c/mpi4py-4.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:2c85983d38d77e6302a242e32afd2a9a9b3adedd770e199a38e5b8957150e7ac", size = 1721603, upload-time = "2025-10-10T13:54:41.396Z" }, + { url = "https://files.pythonhosted.org/packages/ed/02/b6700c24fe28588a4e40adb23d02fe2aea82b33495fd6290235da5199383/mpi4py-4.1.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:729c4f625ad60e5cfb6c260608d249dc35a33cc16605faff01c6adbbd7e8ce0f", size = 1799551, upload-time = "2025-10-10T13:54:43.084Z" }, + { url = "https://files.pythonhosted.org/packages/5a/93/9c9870174183869bd5a50bbfe7bda91a52bf7ca2d0851de4009590e735a2/mpi4py-4.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3cca235d46009f54cb319c779c6ac53d41ce1eee3cf07f157995bc7739329b97", size = 1587583, upload-time = "2025-10-10T13:54:45.989Z" }, + { url = "https://files.pythonhosted.org/packages/29/12/c46bec2311fc937ed3767312f9feb5f11bc70058c20bc53ae7369d759424/mpi4py-4.1.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2580fab891db492f32a6e02717e824f6fd5588be6560b08627c1e9322f7ccbfb", size = 1513437, upload-time = "2025-10-10T13:54:48.145Z" }, + { url = "https://files.pythonhosted.org/packages/09/3e/e46629867204b22ce6804096e0b7d35bb5b473df1d12272021843af726c3/mpi4py-4.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6beec4841f9436d49ec9cabfd76a19df61c10b21ca14eddafa58fe7977802ee7", size = 1395082, upload-time = "2025-10-10T13:54:49.744Z" }, + { url = "https://files.pythonhosted.org/packages/1a/ca/7e27edf78cd8ba68aacafc836004cd092a978f0d5ffc8a3eac9e904a3e0e/mpi4py-4.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:b4b3813da9a7a1fc37ffb8dad314cb396313a40cd3fe150854ab29e999a9eb8c", size = 1771707, upload-time = "2025-10-10T13:54:51.756Z" }, + { url = "https://files.pythonhosted.org/packages/e9/63/b6a2863fb7dd5a9eccfdb055bf1124b999ff755d0187223b307161479b76/mpi4py-4.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:95bb98d946eb88c9ae4dc6c42d11b3af8ce6b91e644c288cc3f85ec7596ffcd3", size = 1480110, upload-time = "2025-10-10T13:55:11.381Z" }, + { url = "https://files.pythonhosted.org/packages/de/18/358f0eb58fb3b79f65861ed682af9e735d86669663dfbce396e8673ed518/mpi4py-4.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:84e9eb2e609b0b94cd0e9a3e3b57d897f748fb0207c4f72e81e5a95aba033767", size = 1340704, upload-time = "2025-10-10T13:55:12.973Z" }, + { url = "https://files.pythonhosted.org/packages/b9/66/b342e330ac543d0147ebfab754f69854c4777ac9785cb5b7610e3cd0c29a/mpi4py-4.1.1-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:027b1a1ff9d57afed10af6b79041b95f85fd11b2af74e4c34ef4866ce81ecc24", size = 1380452, upload-time = "2025-10-10T13:55:14.582Z" }, + { url = "https://files.pythonhosted.org/packages/dd/61/bbf87de6f3a8a9c54e7a4b72878c9069646ca9cafac8217fa5493a54b068/mpi4py-4.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c1191856906967a48fdcc484b326c179747e68c186261d76480a75156bcc73bf", size = 1255980, upload-time = "2025-10-10T13:55:17.075Z" }, + { url = "https://files.pythonhosted.org/packages/8d/4b/227091dec11518e5545bd1ec91f52e06f64bdae697adc5fb33f9f20c04dc/mpi4py-4.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:189d49b0ae963f8f6f5dd8ed0f5f37923285c97bc725476990ec0556972bb4b2", size = 1452641, upload-time = "2025-10-10T13:55:18.562Z" }, ] [[package]] From 3a37c9c0dc093b14bf084aeeabb4238491e590cd Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 14:29:13 +0100 Subject: [PATCH 65/68] Remove explicitl GT4PY_BUILD_JOBS from distributed pipeline --- ci/distributed.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index d3ecfef697..b62447e15d 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -62,7 +62,6 @@ build_distributed: ICON4PY_TEST_DATA_PATH: "/icon4py/testdata" ICON4PY_ENABLE_GRID_DOWNLOAD: false ICON4PY_ENABLE_TESTDATA_DOWNLOAD: false - GT4PY_BUILD_JOBS: 32 GT4PY_BUILD_CACHE_LIFETIME: "persistent" PYTEST_ADDOPTS: "--durations=0" CSCS_ADDITIONAL_MOUNTS: '["/capstor/store/cscs/userlab/cwci02/icon4py/ci/testdata:$ICON4PY_TEST_DATA_PATH"]' From 99811d07b67a359adff52fdc77514e2e6c728ad4 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 15:31:21 +0100 Subject: [PATCH 66/68] Decrease distributed gpu timelimit --- ci/distributed.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index bce42b243d..f5e726ea99 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -107,7 +107,7 @@ build_distributed: - if: $COMPONENT == 'common' && ($BACKEND == 'dace_gpu' || $BACKEND == 'gtfn_gpu') variables: # TODO(msimberg): Decrease this when enabling dace_gpu above, if possible. - SLURM_TIMELIMIT: '03:00:00' + SLURM_TIMELIMIT: '01:30:00' - if: $COMPONENT == 'atmosphere/dycore' variables: SLURM_TIMELIMIT: '00:15:00' From d6dcc6c44cf56206ccda6daa4876b1e2f4c8555a Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 15:57:45 +0100 Subject: [PATCH 67/68] Use normal partition for long distributed CI jobs --- ci/distributed.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/distributed.yml b/ci/distributed.yml index f5e726ea99..28faf710d8 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -108,6 +108,9 @@ build_distributed: variables: # TODO(msimberg): Decrease this when enabling dace_gpu above, if possible. SLURM_TIMELIMIT: '01:30:00' + # TODO(msimberg): Use shared partition when time limit can be set to at + # most an hour. The shared partition only accepts jobs maximum an hour long. + SLURM_PARTITION: "normal" - if: $COMPONENT == 'atmosphere/dycore' variables: SLURM_TIMELIMIT: '00:15:00' From fabff2f9bfe673215ac793437f5627ba414215aa Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 15:59:04 +0100 Subject: [PATCH 68/68] Remove gpus per task entry from distributed ci configuration --- ci/distributed.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/distributed.yml b/ci/distributed.yml index 28faf710d8..50c7f85444 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -61,7 +61,6 @@ build_distributed: SLURM_PARTITION: "shared" SLURM_CPU_BIND: 'verbose' SLURM_NTASKS: 4 - SLURM_GPUS_PER_TASK: 1 ICON4PY_TEST_DATA_PATH: "/icon4py/testdata" ICON4PY_ENABLE_GRID_DOWNLOAD: false ICON4PY_ENABLE_TESTDATA_DOWNLOAD: false