diff --git a/CMakeLists.txt b/CMakeLists.txt index 277f1f067..3a12c8119 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,9 +76,7 @@ endif() # Only for debugging. Save building time by shrinking translation unit scope. set(BUILD_SEPARATE_OPS $ENV{BUILD_SEPARATE_OPS}) -if(CMAKE_BUILD_TYPE MATCHES "(Debug|RelWithDebInfo)") - set(BUILD_SEPARATE_OPS TRUE) -endif() +set(DEBUG_XPU $ENV{DEBUG_XPU}) set(BUILD_SPLIT_KERNEL_LIB $ENV{BUILD_SPLIT_KERNEL_LIB}) add_subdirectory(${TORCH_XPU_OPS_ROOT}/src) diff --git a/cmake/BuildFlags.cmake b/cmake/BuildFlags.cmake index ec9aaccb2..01d9a0343 100644 --- a/cmake/BuildFlags.cmake +++ b/cmake/BuildFlags.cmake @@ -85,10 +85,12 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no-ftz) endif() - if(CMAKE_BUILD_TYPE MATCHES Debug) - set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization) - elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo) - set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2) + if(DEBUG_XPU) + if(CMAKE_BUILD_TYPE MATCHES Debug) + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization) + elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo) + set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2) + endif() endif() set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER}) diff --git a/src/BuildOnLinux.cmake b/src/BuildOnLinux.cmake index aee7118f0..61ae8209d 100644 --- a/src/BuildOnLinux.cmake +++ b/src/BuildOnLinux.cmake @@ -20,7 +20,24 @@ macro(setup_common_libraries) list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops) endmacro() -if(BUILD_SEPARATE_OPS) +if(DEBUG_XPU) + setup_common_libraries() + foreach(sycl_src ${ATen_XPU_SYCL_SRCS}) + get_filename_component(name ${sycl_src} NAME_WLE REALPATH) + set(sycl_lib torch-xpu-ops-sycl-${name}) + sycl_add_library( + ${sycl_lib} + STATIC + SYCL_SOURCES ${sycl_src}) + target_link_libraries(torch_xpu_ops PUBLIC ${sycl_lib}) + target_link_options(torch_xpu_ops PUBLIC + "-Wl,--whole-archive" + $ + "-Wl,--no-whole-archive" + ) + endforeach() + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_lib}) +elseif(BUILD_SEPARATE_OPS) setup_common_libraries() foreach(sycl_src ${ATen_XPU_SYCL_SRCS}) get_filename_component(name ${sycl_src} NAME_WLE REALPATH) diff --git a/src/BuildOnWindows.cmake b/src/BuildOnWindows.cmake index 77bd353d6..956212125 100644 --- a/src/BuildOnWindows.cmake +++ b/src/BuildOnWindows.cmake @@ -23,8 +23,35 @@ macro(setup_common_libraries) target_link_libraries(torch_xpu_ops_aten PUBLIC torch_cpu) target_link_libraries(torch_xpu_ops_aten PUBLIC c10) endmacro() +if(DEBUG_XPU) + add_library( + torch_xpu_ops + STATIC + ${ATen_XPU_CPP_SRCS} + ${ATen_XPU_MKL_SRCS} + ${ATen_XPU_NATIVE_CPP_SRCS} + ${ATen_XPU_GEN_SRCS}) + target_compile_definitions(torch_xpu_ops PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + foreach(sycl_src ${ATen_XPU_SYCL_SRCS}) + get_filename_component(name ${sycl_src} NAME_WLE REALPATH) + set(sycl_lib torch-xpu-ops-sycl-${name}) + sycl_add_library( + ${sycl_lib} + STATIC + SYCL_SOURCES ${sycl_src}) + target_compile_definitions(${sycl_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB) + list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_lib}) -if(BUILD_SEPARATE_OPS) + target_link_libraries(torch_xpu_ops + PUBLIC + ${sycl_lib} + ) + target_link_options(torch_xpu_ops PUBLIC + "-WHOLEARCHIVE:$" + ) + endforeach() + list(APPEND TORCH_XPU_OPS_LIBRARIES torch_xpu_ops) +elseif(BUILD_SEPARATE_OPS) setup_common_libraries() foreach(sycl_src ${ATen_XPU_SYCL_SRCS}) get_filename_component(name ${sycl_src} NAME_WLE REALPATH)