update MultiFab.py to also support SYCL backend

roelof-groenewald · roelof-groenewald · commit 25d3b348730b · 2025-07-28T13:02:50.000-07:00
Signed-off-by: Roelof Groenewald &lt;rgroenewald@tae.com&gt;
diff --git a/src/amrex/extensions/MultiFab.py b/src/amrex/extensions/MultiFab.py
@@ -103,10 +103,50 @@ def mf_to_cupy(self, copy=False, order="F"):
     return views
 
 
+def mf_to_dpnp(self, copy=False, order="F"):
+    """
+    Provide a dpnp view into a MultiFab.
+
+    This includes ngrow guard cells of each box.
+
+    Note on the order of indices:
+    By default, this is as in AMReX in Fortran contiguous order, indexing as
+    x,y,z. This has performance implications for use in external libraries such
+    as cupy.
+    The order="C" option will index as z,y,x and perform may better.
+    https://github.com/AMReX-Codes/pyamrex/issues/55#issuecomment-1579610074
+
+    Parameters
+    ----------
+    self : amrex.MultiFab
+        A MultiFab class in pyAMReX
+    copy : bool, optional
+        Copy the data if true, otherwise create a view (default).
+    order : string, optional
+        F order (default) or C. C is faster with external libraries.
+
+    Returns
+    -------
+    list of dpnp.array
+        A list of dpnp n-dimensional arrays, for each local block in the
+        MultiFab.
+
+    Raises
+    ------
+    ImportError
+        Raises an exception if dpnp is not installed
+    """
+    views = []
+    for mfi in self:
+        views.append(self.array(mfi).to_dpnp(copy, order))
+
+    return views
+
+
 def mf_to_xp(self, copy=False, order="F"):
     """
-    Provide a NumPy or CuPy view into a MultiFab,
-    depending on amr.Config.have_gpu .
+    Provide a NumPy, CuPy or dpnp view into a MultiFab,
+    depending on amr.Config.have_gpu and amr.Config.gpu_backend .
 
     This function is similar to CuPy's xp naming suggestion for CPU/GPU agnostic code:
     https://docs.cupy.dev/en/stable/user_guide/basic.html#how-to-write-cpu-gpu-agnostic-code
@@ -132,15 +172,21 @@ def mf_to_xp(self, copy=False, order="F"):
     Returns
     -------
     list of xp.array
-        A list of NumPy or CuPy n-dimensional arrays, for each local block in the
-        MultiFab.
+        A list of NumPy, CuPy or dpnp n-dimensional arrays, for each local block
+        in the MultiFab.
     """
     import inspect
 
     amr = inspect.getmodule(self)
-    return (
-        self.to_cupy(copy, order) if amr.Config.have_gpu else self.to_numpy(copy, order)
-    )
+
+    if amr.Config.have_gpu:
+        if amr.Config.gpu_backend == "SYCL":
+            return self.to_dpnp(copy, order)
+        else:  # if not SYCL use cupy
+            return self.to_cupy(copy, order)
+
+    # if no GPU, use NumPy
+    return self.to_numpy(copy, order)
 
 
 def copy_multifab(amr, self):
@@ -490,6 +536,9 @@ def __getitem__(self, index, with_internal_ghosts=False):
         Whether to include internal ghost cells. When true, data from ghost cells may be used that
         overlaps valid cells.
     """
+    import inspect
+    amr = inspect.getmodule(self)
+
     index4 = _process_index(self, index)
 
     # Gather the data to be included in a list to be sent to other processes
@@ -503,17 +552,18 @@ def __getitem__(self, index, with_internal_ghosts=False):
             device_arr = _get_field(self, mfi)
             slice_arr = device_arr[block_slices]
             try:
-                # Copy data from host to device using cupy syntax
-                slice_arr = slice_arr.get()
+                if amr.Config.gpu_backend == "SYCL":
+                    import dpnp
+                    slice_arr = dpnp.asnumpy(slice_arr)
+                else:
+                    # Copy data from host to device using cupy syntax
+                    slice_arr = slice_arr.get()
             except AttributeError:
                 # Array is already a numpy array on the host
                 pass
             datalist.append((global_slices, slice_arr))
 
     # Gather the data from all processors
-    import inspect
-
-    amr = inspect.getmodule(self)
     if amr.Config.have_mpi:
         npes = amr.ParallelDescriptor.NProcs()
     else:
@@ -604,7 +654,10 @@ def __setitem__(self, index, value):
 
     amr = inspect.getmodule(self)
     if amr.Config.have_gpu:
-        import cupy as xp
+        if amr.Config.gpu_backend == "SYCL":
+            import dpnp as xp
+        else:
+            import cupy as xp
     else:
         xp = np
 
@@ -654,6 +707,7 @@ def register_MultiFab_extension(amr):
 
     amr.MultiFab.to_numpy = mf_to_numpy
     amr.MultiFab.to_cupy = mf_to_cupy
+    amr.MultiFab.to_dpnp = mf_to_dpnp
     amr.MultiFab.to_xp = mf_to_xp
 
     amr.MultiFab.copy = lambda self: copy_multifab(amr, self)
diff --git a/src/dlpack.h b/src/dlpack.h
@@ -197,7 +197,7 @@ typedef struct {
    * types. This pointer is always aligned to 256 bytes as in CUDA. The
    * `byte_offset` field should be used to point to the beginning of the data.
    *
-   * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
+   * Note that as of Nov 2021, multiple libraries (CuPy, PyTorch, TensorFlow,
    * TVM, perhaps others) do not adhere to this 256 byte aligment requirement
    * on CPU/CUDA/ROCm, and always use `byte_offset=0`.  This must be fixed
    * (after which this note will be updated); at the moment it is recommended

Original file line number	Diff line number	Diff line change
`@@ -197,7 +197,7 @@ typedef struct {`
`197`	`197`	`* types. This pointer is always aligned to 256 bytes as in CUDA. The`
`198`	`198`	* `byte_offset` field should be used to point to the beginning of the data.
`199`	`199`	`*`
`200`		`- * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,`
	`200`	`+ * Note that as of Nov 2021, multiple libraries (CuPy, PyTorch, TensorFlow,`
`201`	`201`	`* TVM, perhaps others) do not adhere to this 256 byte aligment requirement`
`202`	`202`	* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
`203`	`203`	`* (after which this note will be updated); at the moment it is recommended`