Skip to content

Enable dpnp build on AMD GPU #2302

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 51 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,31 +71,61 @@ option(DPNP_TARGET_CUDA
"Build DPNP to target CUDA devices"
OFF
)
option(DPNP_TARGET_HIP
"Build DPNP to target HIP devices"
OFF
)
option(DPNP_USE_ONEMKL_INTERFACES
"Build DPNP with oneMKL Interfaces"
OFF
)
set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume there is no support for multiple values:

Suggested change
set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
set(HIP_TARGET "" CACHE STRING "HIP architecture for target")

Copy link
Collaborator

@ndgrigorian ndgrigorian Apr 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At some point, it was clear in docs that only one architecture was supported at a time, but now it isn't as clear and should be tested

Also, there is new information in the extension guide

The compiler driver also offers alias targets for each target+architecture pair to make the command line shorter and easier to understand for humans. Thanks to the aliases, the -Xsycl-target-backend flags no longer need to be specified.

It shows that the command

icpx -fsycl -fsycl-targets=spir64_gen,amdgcn-amd-amdhsa,nvptx64-nvidia-cuda \
        -Xsycl-target-backend=spir64_gen '-device pvc' \
        -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx1030 \
        -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_80 \
        -o sycl-app sycl-app.cpp

is equivalent to

icpx -fsycl -fsycl-targets=intel_gpu_pvc,amd_gpu_gfx1030,nvidia_gpu_sm_80 \
        -o sycl-app sycl-app.cpp

so maybe both dpctl and dpnp can simplify by removing the need for -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=[X] completely

list of aliases:
https://intel.github.io/llvm/UsersManual.html

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aliases list seems to claim only one alias is supported at a time. So probably only one architecture at once is possible? That would be my guess


set(_dpnp_sycl_targets)
set(_dpnp_amd_targets)
set(_use_onemkl_interfaces OFF)
set(_use_onemkl_interfaces_cuda OFF)
set(_use_onemkl_interfaces_hip OFF)

set(_dpnp_sycl_target_compile_options)
set(_dpnp_sycl_target_link_options)

if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
if(DPNP_TARGET_CUDA)
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
set(_use_onemkl_interfaces_cuda ON)
else()
if(DEFINED ENV{DPNP_TARGET_CUDA})
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
set(_use_onemkl_interfaces_cuda ON)
endif()
endif()
if(DPNP_TARGET_CUDA)
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
set(_use_onemkl_interfaces_cuda ON)
endif()
if(DPNP_TARGET_HIP)
if (NOT "x${HIP_TARGETS}" STREQUAL "x")
set(_dpnp_amd_targets ${HIP_TARGETS})
set(_use_onemkl_interfaces_hip ON)
if(_dpnp_sycl_targets)
set(_dpnp_sycl_targets "amdgcn-amd-amdhsa,${_dpnp_sycl_targets}")
else()
set(_dpnp_sycl_targets "amdgcn-amd-amdhsa,spir64-unknown-unknown")
endif()
else()
message(FATAL_ERROR "HIP_TARGETS must be specified when using HIP backend")
endif()
endif()
else()
set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})
set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})
if (NOT "x${HIP_TARGETS}" STREQUAL "x")
set(_dpnp_amd_targets ${HIP_TARGETS})
set(_use_onemkl_interfaces_hip ON)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need here something similar to above?

        set(_dpnp_sycl_targets "amdgcn-amd-amdhsa,${_dpnp_sycl_targets}")

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if we set DPNP_SYCL_TARGETS via --cmake_opts we expect them to be the right target e.g. amdgcn-amd-amdhsa or nvptx64-nvidia-cuda

endif()
endif()

if(_dpnp_sycl_targets)
if (_dpnp_sycl_targets)
message(STATUS "Compiling for -fsycl-targets=${_dpnp_sycl_targets}")
list(APPEND _dpnp_sycl_target_compile_options -fsycl-targets=${_dpnp_sycl_targets})
list(APPEND _dpnp_sycl_target_link_options -fsycl-targets=${_dpnp_sycl_targets})
if(_dpnp_amd_targets)
list(APPEND _dpnp_sycl_target_compile_options -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${_dpnp_amd_targets})
list(APPEND _dpnp_sycl_target_link_options -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${_dpnp_amd_targets})
endif()
endif()

set(_use_onemkl_interfaces OFF)
if(DPNP_USE_ONEMKL_INTERFACES)
set(_use_onemkl_interfaces ON)
else()
Expand All @@ -107,13 +137,20 @@ endif()
if(_use_onemkl_interfaces)
set(BUILD_FUNCTIONAL_TESTS False)
set(BUILD_EXAMPLES False)
set(ENABLE_MKLGPU_BACKEND True)
set(ENABLE_MKLCPU_BACKEND True)

if(_use_onemkl_interfaces_cuda)
set(ENABLE_CUBLAS_BACKEND True)
set(ENABLE_CUSOLVER_BACKEND True)
set(ENABLE_CUFFT_BACKEND True)
# set(ENABLE_CURAND_BACKEND True)
set(ENABLE_MKLGPU_BACKEND True)
set(ENABLE_MKLCPU_BACKEND True)
endif()
if(_use_onemkl_interfaces_hip)
set(ENABLE_ROCBLAS_BACKEND True)
set(ENABLE_ROCSOLVER_BACKEND True)
set(ENABLE_ROCFFT_BACKEND True)
# set(ENABLE_ROCRAND_BACKEND True)
endif()

if(DPNP_ONEMKL_INTERFACES_DIR)
Expand Down
4 changes: 2 additions & 2 deletions dpnp/backend/extensions/indexing/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ if(_dpnp_sycl_targets)
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_compile_options}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_link_options}
)
endif()

Expand Down
4 changes: 2 additions & 2 deletions dpnp/backend/extensions/statistics/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ if(_dpnp_sycl_targets)
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_compile_options}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_link_options}
)
endif()

Expand Down
4 changes: 2 additions & 2 deletions dpnp/backend/extensions/ufunc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,12 @@ if(_dpnp_sycl_targets)
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_compile_options}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_link_options}
)
endif()

Expand Down
18 changes: 18 additions & 0 deletions scripts/build_locally.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def run(
verbose=False,
cmake_opts="",
target="intel",
arch=None,
onemkl_interfaces=False,
onemkl_interfaces_dir=None,
):
Expand Down Expand Up @@ -104,6 +105,16 @@ def run(
# Always builds using oneMKL interfaces for the cuda target
onemkl_interfaces = True

if target == "hip":
if not arch:
raise ValueError("--arch is required when --target=hip")
cmake_args += [
"-DDPNP_TARGET_HIP=ON",
Copy link
Contributor

@antonwolfy antonwolfy Feb 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For what do we need to define two variables? Can it be combined in a single one, like in dpctl: -DDPNP_TARGET_HIP={arch}?

Copy link
Collaborator

@ndgrigorian ndgrigorian Feb 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additionally, --target=cuda is current dpnp approach, but:

  1. dpctl and dpnp should consider supporting targeting specific CUDA architectures
  2. --target=hip means that there is no way to build simultaneously for HIP and CUDA (which is very, very much an edge case, but should be considered)

For these reasons, I think it is most sensible to move away from --target= universal approach to --target-cuda= and --target-hip= or something to that effect

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ndgrigorian it is a great suggestion.
I have added support for --target-hip and I am going to add --target-cuda instead of --target in the next PR.
Thanks

f"-DHIP_TARGETS={arch}",
]
# Always builds using oneMKL interfaces for the hip target
onemkl_interfaces = True

if onemkl_interfaces:
cmake_args += [
"-DDPNP_USE_ONEMKL_INTERFACES=ON",
Expand Down Expand Up @@ -177,6 +188,12 @@ def run(
default="intel",
type=str,
)
driver.add_argument(
"--arch",
help="Architecture for HIP target",
dest="arch",
type=str,
)
driver.add_argument(
"--onemkl-interfaces",
help="Build using oneMKL Interfaces",
Expand Down Expand Up @@ -244,6 +261,7 @@ def run(
verbose=args.verbose,
cmake_opts=args.cmake_opts,
target=args.target,
arch=args.arch,
onemkl_interfaces=args.onemkl_interfaces,
onemkl_interfaces_dir=args.onemkl_interfaces_dir,
)
Loading