Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5634694
Skip scalar to symbol promotion
FlorianDeconinck Jun 18, 2025
1bacb49
Merge remote-tracking branch 'origin/develop' into feature/oir_stree_…
romanc Jun 19, 2025
3faa58f
Update gt4py and dace submodules
romanc Jun 19, 2025
d94925d
Run stencil factory tests not only against numpy, but also against
romanc Jun 19, 2025
b7772b5
Update gt4py (burning the old bridge)
romanc Jun 19, 2025
092ef1a
Remove splittable region optimization
romanc Jun 20, 2025
9241781
Update gt4py (Last pass on ADR for now)
romanc Jun 25, 2025
f4c5884
Update gt4py branch (Minor cleanup refactors)
romanc Jun 27, 2025
a1b28c2
Merge remote-tracking branch 'origin/develop' into feature/oir_stree_…
romanc Jun 27, 2025
843891e
Validate at `orchestration` entry
FlorianDeconinck Jul 2, 2025
f508ef2
Merge remote-tracking branch 'NOAA/feature/oir_stree_sdfg_bridge' int…
FlorianDeconinck Jul 2, 2025
b2a5b8f
Expose compiler optimization level to `dace` orchestration via `config`
FlorianDeconinck Jul 2, 2025
466fcb2
Update to GT4Py for transient flag
FlorianDeconinck Jul 3, 2025
376d6f6
Merge remote-tracking branch 'origin/develop' into feature/oir_stree_…
romanc Jul 7, 2025
713616d
Fixup: don't declare backends twice in test_stencil_factory
romanc Jul 7, 2025
623235f
Update dace & gt4py
romanc Jul 8, 2025
1ed6f4f
Update gt4py and dace submodules
romanc Jul 10, 2025
b9b6701
Merge remote-tracking branch 'origin/develop' into feature/oir_stree_…
romanc Jul 15, 2025
c478dfb
Update dace & gt4py to latest dev commits
romanc Jul 15, 2025
2003fbb
Update gt4py and dace (DDE fixes)
romanc Jul 16, 2025
7ed9cb7
Update gt4py (move dace branch to GridTools/dace)
romanc Jul 16, 2025
327350c
Merge remote-tracking branch 'origin/develop' into feature/oir_stree_…
romanc Jul 18, 2025
0c1d4c3
Import style: avoid importing everything
romanc Jul 23, 2025
1fe7f0b
Merge remote-tracking branch 'origin/develop' into feature/oir_stree_…
romanc Jul 24, 2025
a14b0fe
Update gt4py/dace: preliminary NView support
romanc Jul 24, 2025
05c4ffd
Update gt4py (oir -> stree bridge from mainline)
romanc Jul 28, 2025
ec92998
Merge remote-tracking branch 'origin/develop' into feature/oir_stree_…
romanc Jul 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion external/gt4py
Submodule gt4py updated 318 files
9 changes: 8 additions & 1 deletion ndsl/dsl/dace/dace_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import dace.config
from dace.codegen.compiled_sdfg import CompiledSDFG
from dace.frontend.python.parser import DaceProgram
from gt4py.cartesian.config import GT4PY_COMPILE_OPT_LEVEL

from ndsl.comm.communicator import Communicator
from ndsl.comm.partitioner import Partitioner
Expand Down Expand Up @@ -181,6 +182,12 @@ def __init__(
# We control this Dace configuration below with our own override
dace_debug_env_var = os.getenv("PACE_DACE_DEBUG", "False") == "True"

# We hijack the optimization level of GT4Py because we don't
# have the configuration at NDSL level, but we do use the GT4Py
# level
# TODO: if GT4PY opt level is funnled via NDSL - use it here
optimization_level = GT4PY_COMPILE_OPT_LEVEL

# Set the configuration of DaCe to a rigid & tested set of divergence
# from the defaults when orchestrating
if orchestration != DaCeOrchestration.Python:
Expand All @@ -195,7 +202,7 @@ def __init__(
"compiler",
"cpu",
"args",
value="-std=c++14 -fPIC -Wall -Wextra -O3",
value=f"-std=c++14 -fPIC -Wall -Wextra -O{optimization_level}",
)
# Potentially buggy - deactivate
dace.config.Config.set(
Expand Down
65 changes: 33 additions & 32 deletions ndsl/dsl/dace/orchestration.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
import os
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union

import dace
import gt4py.storage
from dace import SDFG
from dace import compiletime as DaceCompiletime
from dace import dtypes
from dace import method as dace_method
from dace import nodes
from dace import program as dace_program
from dace.dtypes import DeviceType as DaceDeviceType
from dace.dtypes import StorageType as DaceStorageType
from dace.frontend.python.common import SDFGConvertible
from dace.frontend.python.parser import DaceProgram
from dace.transformation.auto.auto_optimize import make_transients_persistent
from dace.transformation.helpers import get_parent_map
from dace.transformation.passes.simplify import SimplifyPass
from gt4py import storage

from ndsl.comm.mpi import MPI
from ndsl.dsl.dace.build import get_sdfg_path, write_build_info
Expand All @@ -27,7 +31,6 @@
negative_qtracers_checker,
sdfg_nan_checker,
)
from ndsl.dsl.dace.sdfg_opt_passes import splittable_region_expansion
from ndsl.dsl.dace.utils import (
DaCeProgress,
memory_static_analysis,
Expand Down Expand Up @@ -61,18 +64,18 @@ def _download_results_from_dace(
return None

backend = config.get_backend()
return [gt4py.storage.from_array(result, backend=backend) for result in dace_result]
return [storage.from_array(result, backend=backend) for result in dace_result]


def _to_gpu(sdfg: dace.SDFG):
def _to_gpu(sdfg: SDFG):
"""Flag memory in SDFG to GPU.
Force deactivate OpenMP sections for sanity."""

# Gather all maps
allmaps = [
(me, state)
for me, state in sdfg.all_nodes_recursive()
if isinstance(me, dace.nodes.MapEntry)
if isinstance(me, nodes.MapEntry)
]
topmaps = [
(me, state) for me, state in allmaps if get_parent_map(state, me) is None
Expand All @@ -81,21 +84,21 @@ def _to_gpu(sdfg: dace.SDFG):
# Set storage of arrays to GPU, scalarizable arrays will be set on registers
for sd, _aname, arr in sdfg.arrays_recursive():
if arr.shape == (1,):
arr.storage = dace.StorageType.Register
arr.storage = dtypes.StorageType.Register
else:
arr.storage = dace.StorageType.GPU_Global
arr.storage = dtypes.StorageType.GPU_Global

# All maps will be schedule on GPU
for mapentry, _state in topmaps:
mapentry.schedule = dace.ScheduleType.GPU_Device
mapentry.schedule = dtypes.ScheduleType.GPU_Device

# Deactivate OpenMP sections
for sd in sdfg.all_sdfgs_recursive():
sd.openmp_sections = False


def _simplify(
sdfg: dace.SDFG,
sdfg: SDFG,
*,
validate: bool = True,
validate_all: bool = False,
Expand All @@ -108,24 +111,33 @@ def _simplify(
validate=validate,
validate_all=validate_all,
verbose=verbose,
skip=["ScalarToSymbolPromotion"],
).apply_pass(sdfg, {})


def _build_sdfg(
dace_program: DaceProgram, sdfg: dace.SDFG, config: DaceConfig, args, kwargs
dace_program: DaceProgram, sdfg: SDFG, config: DaceConfig, args, kwargs
):
"""Build the .so out of the SDFG on the top tile ranks only"""
is_compiling = True if DEACTIVATE_DISTRIBUTED_DACE_COMPILE else config.do_compile

if is_compiling:
with DaCeProgress(config, "Validate original SDFG"):
sdfg.validate()

# Make the transients array persistents
if config.is_gpu_backend():
# TODO
# The following should happen on the stree level
_to_gpu(sdfg)

make_transients_persistent(sdfg=sdfg, device=DaceDeviceType.GPU)

# Upload args to device
_upload_to_device(list(args) + list(kwargs.values()))
else:
# TODO
# The following should happen on the stree level
for _sd, _aname, arr in sdfg.arrays_recursive():
if arr.shape == (1,):
arr.storage = DaceStorageType.Register
Expand All @@ -141,29 +153,18 @@ def _build_sdfg(
if k in sdfg_kwargs and tup[1].transient:
del sdfg_kwargs[k]

with DaCeProgress(config, "Simplify (1/2)"):
_simplify(sdfg, validate=False, verbose=True)

# Perform pre-expansion fine tuning
with DaCeProgress(config, "Split regions"):
splittable_region_expansion(sdfg, verbose=True)

# Expand the stencil computation Library Nodes with the right expansion
with DaCeProgress(config, "Expand"):
sdfg.expand_library_nodes()

with DaCeProgress(config, "Simplify (2/2)"):
with DaCeProgress(config, "Simplify"):
_simplify(sdfg, validate=False, verbose=True)

# Move all memory that can be into a pool to lower memory pressure.
# Change Persistent memory (sub-SDFG) into Scope and flag it.
with DaCeProgress(config, "Turn Persistents into pooled Scope"):
memory_pooled = 0.0
for _sd, _aname, arr in sdfg.arrays_recursive():
if arr.lifetime == dace.AllocationLifetime.Persistent:
if arr.lifetime == dtypes.AllocationLifetime.Persistent:
arr.pool = True
memory_pooled += arr.total_size * arr.dtype.bytes
arr.lifetime = dace.AllocationLifetime.Scope
arr.lifetime = dtypes.AllocationLifetime.Scope
memory_pooled = float(memory_pooled) / (1024 * 1024)
ndsl_log.debug(
f"{DaCeProgress.default_prefix(config)} Pooled {memory_pooled} mb",
Expand All @@ -180,7 +181,9 @@ def _build_sdfg(
# Compile
with DaCeProgress(config, "Codegen & compile"):
sdfg.compile()
write_build_info(sdfg, config.layout, config.tile_resolution, config._backend)
write_build_info(
sdfg, config.layout, config.tile_resolution, config.get_backend()
)

# Printing analysis of the compiled SDFG
with DaCeProgress(config, "Build finished. Running memory static analysis"):
Expand Down Expand Up @@ -223,9 +226,7 @@ def _build_sdfg(
return _call_sdfg(dace_program, sdfg, config, args, kwargs)


def _call_sdfg(
dace_program: DaceProgram, sdfg: dace.SDFG, config: DaceConfig, args, kwargs
):
def _call_sdfg(dace_program: DaceProgram, sdfg: SDFG, config: DaceConfig, args, kwargs):
"""Dispatch the SDFG execution and/or build"""
# Pre-compiled SDFG code path does away with any data checks and
# cached the marshalling - leading to almost direct C call
Expand Down Expand Up @@ -259,7 +260,7 @@ def _parse_sdfg(
config: DaceConfig,
*args,
**kwargs,
) -> Optional[dace.SDFG]:
) -> Optional[SDFG]:
"""Return an SDFG depending on cache existence.
Either parses, load a .sdfg or load .so (as a compiled sdfg)

Expand Down Expand Up @@ -318,7 +319,7 @@ class _LazyComputepathFunction(SDFGConvertible):
def __init__(self, func: Callable, config: DaceConfig):
self.func = func
self.config = config
self.daceprog: DaceProgram = dace.program(self.func)
self.daceprog: DaceProgram = dace_program(self.func)
self._sdfg = None

def __call__(self, *args, **kwargs):
Expand Down Expand Up @@ -373,7 +374,7 @@ class _LazyComputepathMethod:

class SDFGEnabledCallable(SDFGConvertible):
def __init__(self, lazy_method: _LazyComputepathMethod, obj_to_bind):
methodwrapper = dace.method(lazy_method.func)
methodwrapper = dace_method(lazy_method.func)
self.obj_to_bind = obj_to_bind
self.lazy_method = lazy_method
self.daceprog: DaceProgram = methodwrapper.__get__(obj_to_bind)
Expand Down
24 changes: 0 additions & 24 deletions ndsl/dsl/dace/sdfg_opt_passes.py

This file was deleted.