From fbb7edf4e01f569bb4dae91bfd121c813aa27681 Mon Sep 17 00:00:00 2001 From: PriscillaJCorn Date: Mon, 27 Apr 2026 05:59:34 +0000 Subject: [PATCH 1/3] [fix] solve avx516 bug --- cmake/onnxruntime_mlas.cmake | 16 ++++++++++++++-- onnxruntime/core/mlas/lib/platform.cpp | 6 +++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake index bde73252449dc..66a3938e88ce2 100644 --- a/cmake/onnxruntime_mlas.cmake +++ b/cmake/onnxruntime_mlas.cmake @@ -774,11 +774,23 @@ else() ${MLAS_SRC_DIR}/rotary_embedding_kernel_avx2.cpp ${MLAS_SRC_DIR}/rotary_embedding_kernel_avx2.cpp ) - if(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 13.1 AND NOT(APPLE)) + + include(CheckCSourceCompiles) + + set(CMAKE_REQUIRED_FLAGS "-mavx512fp16") + check_c_source_compiles(" + int main() { + __asm__ volatile(\"vcvtneeph2ps %ymm0, %ymm1\"); + return 0; + } + " COMPILER_SUPPORTS_AVX512FP16) + + if(COMPILER_SUPPORTS_AVX512FP16 AND NOT APPLE) set(mlas_platform_srcs_avx2 ${mlas_platform_srcs_avx2} ${MLAS_SRC_DIR}/x86_64/cvtfp16Avx.S ) + add_compile_definitions(MLAS_SUPPORTS_AVX512FP16) endif() message(STATUS "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") @@ -997,4 +1009,4 @@ if (NOT onnxruntime_ORT_MINIMAL_BUILD) endif() endif() -endif() \ No newline at end of file +endif() diff --git a/onnxruntime/core/mlas/lib/platform.cpp b/onnxruntime/core/mlas/lib/platform.cpp index e9f140a2ee0f7..1c295799541b2 100644 --- a/onnxruntime/core/mlas/lib/platform.cpp +++ b/onnxruntime/core/mlas/lib/platform.cpp @@ -527,14 +527,14 @@ Return Value: } #ifndef __APPLE__ -#if (defined(_MSC_VER) && (_MSC_VER >= 1933)) || (defined(__GNUC__) && (__GNUC__ >= 13)) +#if defined(MLAS_SUPPORTS_AVX512FP16) // // Check if the processor supports AVX NE CONVERT. // if ((Cpuid7_1[3] & (0b1 << 5)) != 0) { this->CastF16ToF32Kernel = &MlasCastF16ToF32KernelAvx; } -#endif // (defined(_MSC_VER) && (_MSC_VER >= 1933)) || (defined(__GNUC__) && (__GNUC__ >= 13)) +#endif // MLAS_SUPPORTS_AVX512FP16 // @@ -671,7 +671,7 @@ Return Value: } else{ this->ErfFP16KernelRoutine = MlasNeonErfFP16Kernel; - this->GeluFP16KernelRoutine = MlasNeonGeluFP16Kernel; + this->GeluFP16KernelRoutine = MlasNeonGeluFP16Kernel; } #else this->ErfFP16KernelRoutine = MlasNeonErfFP16Kernel; From 6a61fe8419346cc1d75e2dd5f579a97be3d65c97 Mon Sep 17 00:00:00 2001 From: PriscillaJCorn <152854313+PriscillaJCorn@users.noreply.github.com> Date: Tue, 28 Apr 2026 10:49:32 +0800 Subject: [PATCH 2/3] Update cmake/onnxruntime_mlas.cmake Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- cmake/onnxruntime_mlas.cmake | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake index 66a3938e88ce2..73b0a28ef3463 100644 --- a/cmake/onnxruntime_mlas.cmake +++ b/cmake/onnxruntime_mlas.cmake @@ -777,13 +777,19 @@ else() include(CheckCSourceCompiles) - set(CMAKE_REQUIRED_FLAGS "-mavx512fp16") + set(MLAS_OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + if(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mavx512fp16") + else() + set(CMAKE_REQUIRED_FLAGS "-mavx512fp16") + endif() check_c_source_compiles(" int main() { __asm__ volatile(\"vcvtneeph2ps %ymm0, %ymm1\"); return 0; } " COMPILER_SUPPORTS_AVX512FP16) + set(CMAKE_REQUIRED_FLAGS "${MLAS_OLD_CMAKE_REQUIRED_FLAGS}") if(COMPILER_SUPPORTS_AVX512FP16 AND NOT APPLE) set(mlas_platform_srcs_avx2 From d8cb7edb4038315076d96d27db26f0e5777a4014 Mon Sep 17 00:00:00 2001 From: PriscillaJCorn <152854313+PriscillaJCorn@users.noreply.github.com> Date: Tue, 28 Apr 2026 11:20:20 +0800 Subject: [PATCH 3/3] Update cmake/onnxruntime_mlas.cmake Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- cmake/onnxruntime_mlas.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake index 73b0a28ef3463..77b18390b6afd 100644 --- a/cmake/onnxruntime_mlas.cmake +++ b/cmake/onnxruntime_mlas.cmake @@ -796,7 +796,7 @@ else() ${mlas_platform_srcs_avx2} ${MLAS_SRC_DIR}/x86_64/cvtfp16Avx.S ) - add_compile_definitions(MLAS_SUPPORTS_AVX512FP16) + list(APPEND mlas_private_compile_definitions MLAS_SUPPORTS_AVX512FP16) endif() message(STATUS "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")