Skip to content

Commit 25d3b34

Browse files
update MultiFab.py to also support SYCL backend
Signed-off-by: Roelof Groenewald <[email protected]>
1 parent ccce85c commit 25d3b34

File tree

2 files changed

+68
-14
lines changed

2 files changed

+68
-14
lines changed

src/amrex/extensions/MultiFab.py

Lines changed: 67 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,50 @@ def mf_to_cupy(self, copy=False, order="F"):
103103
return views
104104

105105

106+
def mf_to_dpnp(self, copy=False, order="F"):
107+
"""
108+
Provide a dpnp view into a MultiFab.
109+
110+
This includes ngrow guard cells of each box.
111+
112+
Note on the order of indices:
113+
By default, this is as in AMReX in Fortran contiguous order, indexing as
114+
x,y,z. This has performance implications for use in external libraries such
115+
as cupy.
116+
The order="C" option will index as z,y,x and perform may better.
117+
https://github.com/AMReX-Codes/pyamrex/issues/55#issuecomment-1579610074
118+
119+
Parameters
120+
----------
121+
self : amrex.MultiFab
122+
A MultiFab class in pyAMReX
123+
copy : bool, optional
124+
Copy the data if true, otherwise create a view (default).
125+
order : string, optional
126+
F order (default) or C. C is faster with external libraries.
127+
128+
Returns
129+
-------
130+
list of dpnp.array
131+
A list of dpnp n-dimensional arrays, for each local block in the
132+
MultiFab.
133+
134+
Raises
135+
------
136+
ImportError
137+
Raises an exception if dpnp is not installed
138+
"""
139+
views = []
140+
for mfi in self:
141+
views.append(self.array(mfi).to_dpnp(copy, order))
142+
143+
return views
144+
145+
106146
def mf_to_xp(self, copy=False, order="F"):
107147
"""
108-
Provide a NumPy or CuPy view into a MultiFab,
109-
depending on amr.Config.have_gpu .
148+
Provide a NumPy, CuPy or dpnp view into a MultiFab,
149+
depending on amr.Config.have_gpu and amr.Config.gpu_backend .
110150
111151
This function is similar to CuPy's xp naming suggestion for CPU/GPU agnostic code:
112152
https://docs.cupy.dev/en/stable/user_guide/basic.html#how-to-write-cpu-gpu-agnostic-code
@@ -132,15 +172,21 @@ def mf_to_xp(self, copy=False, order="F"):
132172
Returns
133173
-------
134174
list of xp.array
135-
A list of NumPy or CuPy n-dimensional arrays, for each local block in the
136-
MultiFab.
175+
A list of NumPy, CuPy or dpnp n-dimensional arrays, for each local block
176+
in the MultiFab.
137177
"""
138178
import inspect
139179

140180
amr = inspect.getmodule(self)
141-
return (
142-
self.to_cupy(copy, order) if amr.Config.have_gpu else self.to_numpy(copy, order)
143-
)
181+
182+
if amr.Config.have_gpu:
183+
if amr.Config.gpu_backend == "SYCL":
184+
return self.to_dpnp(copy, order)
185+
else: # if not SYCL use cupy
186+
return self.to_cupy(copy, order)
187+
188+
# if no GPU, use NumPy
189+
return self.to_numpy(copy, order)
144190

145191

146192
def copy_multifab(amr, self):
@@ -490,6 +536,9 @@ def __getitem__(self, index, with_internal_ghosts=False):
490536
Whether to include internal ghost cells. When true, data from ghost cells may be used that
491537
overlaps valid cells.
492538
"""
539+
import inspect
540+
amr = inspect.getmodule(self)
541+
493542
index4 = _process_index(self, index)
494543

495544
# Gather the data to be included in a list to be sent to other processes
@@ -503,17 +552,18 @@ def __getitem__(self, index, with_internal_ghosts=False):
503552
device_arr = _get_field(self, mfi)
504553
slice_arr = device_arr[block_slices]
505554
try:
506-
# Copy data from host to device using cupy syntax
507-
slice_arr = slice_arr.get()
555+
if amr.Config.gpu_backend == "SYCL":
556+
import dpnp
557+
slice_arr = dpnp.asnumpy(slice_arr)
558+
else:
559+
# Copy data from host to device using cupy syntax
560+
slice_arr = slice_arr.get()
508561
except AttributeError:
509562
# Array is already a numpy array on the host
510563
pass
511564
datalist.append((global_slices, slice_arr))
512565

513566
# Gather the data from all processors
514-
import inspect
515-
516-
amr = inspect.getmodule(self)
517567
if amr.Config.have_mpi:
518568
npes = amr.ParallelDescriptor.NProcs()
519569
else:
@@ -604,7 +654,10 @@ def __setitem__(self, index, value):
604654

605655
amr = inspect.getmodule(self)
606656
if amr.Config.have_gpu:
607-
import cupy as xp
657+
if amr.Config.gpu_backend == "SYCL":
658+
import dpnp as xp
659+
else:
660+
import cupy as xp
608661
else:
609662
xp = np
610663

@@ -654,6 +707,7 @@ def register_MultiFab_extension(amr):
654707

655708
amr.MultiFab.to_numpy = mf_to_numpy
656709
amr.MultiFab.to_cupy = mf_to_cupy
710+
amr.MultiFab.to_dpnp = mf_to_dpnp
657711
amr.MultiFab.to_xp = mf_to_xp
658712

659713
amr.MultiFab.copy = lambda self: copy_multifab(amr, self)

src/dlpack.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ typedef struct {
197197
* types. This pointer is always aligned to 256 bytes as in CUDA. The
198198
* `byte_offset` field should be used to point to the beginning of the data.
199199
*
200-
* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
200+
* Note that as of Nov 2021, multiple libraries (CuPy, PyTorch, TensorFlow,
201201
* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
202202
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
203203
* (after which this note will be updated); at the moment it is recommended

0 commit comments

Comments
 (0)