Skip to content

Commit 2d82713

Browse files
authored
Merge pull request #820 from intel/sync_msft_01102025
Sync with Microsoft ONNX Runtime - 01/10/2025
2 parents 9d56532 + ce3a033 commit 2d82713

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1031
-303
lines changed

.github/workflows/android.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ jobs:
7171
run: |
7272
set -e -x
7373
BINARY_SIZE_THRESHOLD_ARGS=""
74-
echo "Binary size threshold in bytes: 1722565"
75-
BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1722565"
74+
echo "Binary size threshold in bytes: 1436672"
75+
BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1436672"
7676
7777
# Ensure ANDROID_NDK_HOME is available and get its real path
7878
if [ -z "$ANDROID_NDK_HOME" ]; then

.github/workflows/macos-ci-build-and-test-workflow.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ jobs:
6262
--build_objc
6363
--build_java
6464
--build_wheel
65+
${{ matrix.target == 'arm64' && '--enable_arm_neon_nchwc' || '' }}
6566
${{ inputs.use_webgpu && '--use_webgpu' || '' }}
6667
${{ inputs.use_xnnpack && '--use_xnnpack' || '' }}
6768
${{ inputs.use_coreml && '--use_coreml --skip_onnx_tests' || '' }}

.github/workflows/react_native.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ jobs:
102102
run: sudo apt-get update && sudo apt-get install -y ninja-build
103103

104104
- name: Download Android AAR artifacts
105-
uses: actions/download-artifact@v4
105+
uses: actions/download-artifact@v5
106106
with:
107107
name: onnxruntime-android-full-aar
108108
path: ${{ runner.temp }}/android-full-aar
@@ -221,7 +221,7 @@ jobs:
221221
uses: actions/checkout@v5
222222

223223
- name: Download iOS pod artifact
224-
uses: actions/download-artifact@v4
224+
uses: actions/download-artifact@v5
225225
with:
226226
name: ios_pod
227227
path: ${{ runner.temp }}/ios_pod
@@ -277,7 +277,7 @@ jobs:
277277
uses: actions/checkout@v5
278278

279279
- name: Download iOS pod artifact
280-
uses: actions/download-artifact@v4
280+
uses: actions/download-artifact@v5
281281
with:
282282
name: ios_pod
283283
path: ${{ runner.temp }}/ios_pod

cmake/CMakeLists.txt

Lines changed: 63 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,9 @@ option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF)
8888
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
8989
option(onnxruntime_USE_JSEP "Build with JavaScript implemented kernels support" OFF)
9090
option(onnxruntime_USE_SVE "Build with SVE support in MLAS" OFF)
91+
option(onnxruntime_USE_ARM_NEON_NCHWC "Build with ARM Neon NCHWc kernels in MLAS" OFF)
9192

9293
option(onnxruntime_USE_KLEIDIAI "Build with KleidiAI integration in MLAS" OFF)
93-
# iOS simulator build explicitly builds targets with USE_KLEIDIAI=ON so attempting to force override if so
94-
if(APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
95-
message(WARNING "Disabling KleidiAI: not supported on Apple x86_64 platforms")
96-
set(onnxruntime_USE_KLEIDIAI OFF CACHE BOOL "" FORCE)
97-
endif()
9894
option(onnxruntime_BUILD_UNIT_TESTS "Build ONNXRuntime unit tests" ON)
9995
option(onnxruntime_BUILD_CSHARP "Build C# library" OFF)
10096
option(onnxruntime_BUILD_OBJC "Build Objective-C library" OFF)
@@ -258,6 +254,8 @@ if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_
258254
message(FATAL_ERROR "GCC version must be greater than or equal to 11.1")
259255
endif()
260256

257+
include(detect_onnxruntime_target_platform.cmake)
258+
261259
# ENABLE_TRAINING includes all training functionality
262260
# The following 2 entry points
263261
# 1. ORTModule
@@ -434,14 +432,6 @@ set(ORTTRAINING_SOURCE_DIR ${ORTTRAINING_ROOT}/orttraining)
434432

435433
include(adjust_global_compile_flags.cmake)
436434

437-
if (APPLE)
438-
if (NOT CMAKE_OSX_ARCHITECTURES)
439-
message("Building ONNX Runtime for ${CMAKE_HOST_SYSTEM_PROCESSOR} CPU ARCH")
440-
endif()
441-
elseif (NOT WIN32 AND NOT APPLE)
442-
message("Building ONNX Runtime for ${onnxruntime_target_platform} CPU ARCH")
443-
endif()
444-
445435
# We need to link with libatomic on systems that do not have built-in atomics, or
446436
# don't have built-in support for 8 byte atomics
447437
# Derived from https://github.com/protocolbuffers/protobuf/blob/master/cmake/CMakeLists.txt
@@ -513,6 +503,66 @@ if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_ENABLE_PYTHON)
513503
endif()
514504
endif()
515505

506+
if (onnxruntime_USE_ARM_NEON_NCHWC)
507+
message(STATUS "Building MLAS with ARM Neon NCHWc kernels")
508+
endif()
509+
510+
if(onnxruntime_USE_SVE)
511+
if(LINUX AND onnxruntime_target_platform STREQUAL "aarch64")
512+
check_cxx_compiler_flag("-march=armv8.2-a+sve" HAS_ARM64_SVE)
513+
if(HAS_ARM64_SVE)
514+
message(STATUS "Compiler supports SVE!")
515+
else()
516+
message(WARNING "onnxruntime_USE_SVE was set but compiler does not support SVE. It will be disabled.")
517+
set(onnxruntime_USE_SVE OFF)
518+
endif()
519+
else()
520+
message(WARNING "onnxruntime_USE_SVE was set but it is not supported on this platform. It will be disabled.")
521+
set(onnxruntime_USE_SVE OFF)
522+
endif()
523+
endif()
524+
525+
if(onnxruntime_USE_KLEIDIAI)
526+
function(is_kleidiai_supported is_supported_var)
527+
# check for supported target platforms
528+
if(NOT (onnxruntime_target_platform STREQUAL "aarch64" OR
529+
onnxruntime_target_platform STREQUAL "ARM64" OR
530+
onnxruntime_target_platform STREQUAL "arm64"))
531+
message(WARNING "KleidiAI is not supported on this platform.")
532+
533+
set(${is_supported_var} FALSE PARENT_SCOPE)
534+
return()
535+
endif()
536+
537+
# check for compiler support
538+
if(MSVC)
539+
# TODO detect on MSVC
540+
else()
541+
check_cxx_compiler_flag(-march=armv8.2-a+dotprod HAS_ARM64_DOTPROD)
542+
check_cxx_compiler_flag(-march=armv8.2-a+i8mm HAS_ARM64_I8MM)
543+
if(NOT HAS_ARM64_DOTPROD)
544+
message(WARNING "The compiler doesn't support dotprod instructions.")
545+
endif()
546+
if(NOT HAS_ARM64_I8MM)
547+
message(WARNING "The compiler doesn't support i8mm instructions.")
548+
endif()
549+
if(NOT HAS_ARM64_DOTPROD OR NOT HAS_ARM64_I8MM)
550+
set(${is_supported_var} FALSE PARENT_SCOPE)
551+
return()
552+
endif()
553+
endif()
554+
555+
set(${is_supported_var} TRUE PARENT_SCOPE)
556+
endfunction()
557+
558+
is_kleidiai_supported(is_kleidiai_supported_result)
559+
560+
if(NOT is_kleidiai_supported_result)
561+
message(WARNING "onnxruntime_USE_KLEIDIAI was set but it is not supported. It will be disabled.")
562+
set(onnxruntime_USE_KLEIDIAI OFF)
563+
endif()
564+
endif()
565+
516566
#Dependencies begin
517567
get_filename_component(ONNXRUNTIME_ROOT "${ONNXRUNTIME_ROOT}" ABSOLUTE)
518568
get_filename_component(ORTTRAINING_ROOT "${ORTTRAINING_ROOT}" ABSOLUTE)
@@ -663,43 +713,6 @@ else()
663713
endif()
664714
endif()
665715

666-
if(onnxruntime_USE_SVE)
667-
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
668-
check_cxx_compiler_flag("-march=armv8.2-a+sve" HAS_ARM64_SVE)
669-
if(HAS_ARM64_SVE)
670-
message(STATUS "Compiler supports SVE!")
671-
else()
672-
message(WARNING "onnxruntime_USE_SVE was set but compiler does not support SVE. It will be disabled.")
673-
set(onnxruntime_USE_SVE OFF)
674-
endif()
675-
else()
676-
message(WARNING "onnxruntime_USE_SVE was set but it is not supported on this platform. It will be disabled.")
677-
set(onnxruntime_USE_SVE OFF)
678-
endif()
679-
endif()
680-
681-
if (onnxruntime_USE_KLEIDIAI AND (
682-
(onnxruntime_target_platform STREQUAL "aarch64") OR
683-
(onnxruntime_target_platform STREQUAL "ARM64") OR
684-
(APPLE AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")))
685-
686-
# TODO Add checks for MSVC Compilation
687-
if(NOT MSVC)
688-
check_cxx_compiler_flag(-march=armv8.2-a+dotprod HAS_ARM64_DOTPROD)
689-
check_cxx_compiler_flag(-march=armv8.2-a+i8mm HAS_ARM64_I8MM)
690-
if (NOT HAS_ARM64_DOTPROD)
691-
message(FATAL_ERROR "The compiler doesn't support dotprod")
692-
endif()
693-
if (NOT HAS_ARM64_I8MM)
694-
message(FATAL_ERROR "The compiler doesn't support i8mm")
695-
endif()
696-
else()
697-
message(STATUS "Skipping -march= checks on MSVC (not supported), assuming dotprod/i8mm support manually.")
698-
set(HAS_ARM64_DOTPROD TRUE)
699-
set(HAS_ARM64_I8MM TRUE)
700-
endif()
701-
endif()
702-
703716
#names in this var must match the directory names under onnxruntime/core/providers
704717
#ONNXRUNTIME_PROVIDER_NAMES is the list of providers that needs to export additional symbols in the global namespace.
705718
#For example CUDA EP exports "OrtSessionOptionsAppendExecutionProvider_CUDA", which is a global function.

cmake/adjust_global_compile_flags.cmake

Lines changed: 11 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -217,30 +217,20 @@ endmacro()
217217
#Set global compile flags for all the source code(including third_party code like protobuf)
218218
#This section must be before any add_subdirectory, otherwise build may fail because /MD,/MT mismatch
219219
if (MSVC)
220-
if (CMAKE_VS_PLATFORM_NAME)
221-
# Multi-platform generator
222-
set(onnxruntime_target_platform ${CMAKE_VS_PLATFORM_NAME})
223-
else()
224-
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
225-
endif()
226-
if (onnxruntime_target_platform STREQUAL "ARM64")
227-
set(onnxruntime_target_platform "ARM64")
228-
enable_language(ASM_MARMASM)
229-
elseif (onnxruntime_target_platform STREQUAL "ARM64EC")
220+
if (onnxruntime_target_platform STREQUAL "ARM64" OR
221+
onnxruntime_target_platform STREQUAL "ARM64EC" OR
222+
onnxruntime_target_platform STREQUAL "ARM")
230223
enable_language(ASM_MARMASM)
231-
elseif (onnxruntime_target_platform STREQUAL "ARM" OR CMAKE_GENERATOR MATCHES "ARM")
232-
set(onnxruntime_target_platform "ARM")
233-
enable_language(ASM_MARMASM)
234-
elseif (onnxruntime_target_platform STREQUAL "x64" OR onnxruntime_target_platform STREQUAL "x86_64" OR onnxruntime_target_platform STREQUAL "AMD64" OR CMAKE_GENERATOR MATCHES "Win64")
235-
set(onnxruntime_target_platform "x64")
236-
enable_language(ASM_MASM)
237-
elseif (onnxruntime_target_platform STREQUAL "Win32" OR onnxruntime_target_platform STREQUAL "x86" OR onnxruntime_target_platform STREQUAL "i386" OR onnxruntime_target_platform STREQUAL "i686")
238-
set(onnxruntime_target_platform "x86")
224+
elseif (onnxruntime_target_platform STREQUAL "x64" OR
225+
onnxruntime_target_platform STREQUAL "x86")
239226
enable_language(ASM_MASM)
240-
message("Enabling SAFESEH for x86 build")
241-
set(CMAKE_ASM_MASM_FLAGS "${CMAKE_ASM_MASM_FLAGS} /safeseh")
227+
228+
if (onnxruntime_target_platform STREQUAL "x86")
229+
message("Enabling SAFESEH for x86 build")
230+
set(CMAKE_ASM_MASM_FLAGS "${CMAKE_ASM_MASM_FLAGS} /safeseh")
231+
endif()
242232
else()
243-
message(FATAL_ERROR "Unknown CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
233+
message(FATAL_ERROR "Unsupported onnxruntime_target_platform value: ${onnxruntime_target_platform}")
244234
endif()
245235

246236
#Always enable exception handling, even for Windows ARM
@@ -269,34 +259,6 @@ if (MSVC)
269259
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Gw /GL")
270260
endif()
271261
else()
272-
if (NOT APPLE)
273-
#XXX: Sometimes the value of CMAKE_SYSTEM_PROCESSOR is set but it's wrong. For example, if you run an armv7 docker
274-
#image on an aarch64 machine with an aarch64 Ubuntu host OS, in the docker instance cmake may still report
275-
# CMAKE_SYSTEM_PROCESSOR as aarch64 by default. Given compiling this code may need more than 2GB memory, we do not
276-
# support compiling for ARM32 natively(only support cross-compiling), we will ignore this issue for now.
277-
if(NOT CMAKE_SYSTEM_PROCESSOR)
278-
message(WARNING "CMAKE_SYSTEM_PROCESSOR is not set. Please set it in your toolchain cmake file.")
279-
# Try to detect it
280-
if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
281-
execute_process(
282-
COMMAND "${CMAKE_C_COMPILER}" -dumpmachine
283-
OUTPUT_VARIABLE GCC_DUMP_MACHINE_OUT OUTPUT_STRIP_TRAILING_WHITESPACE
284-
ERROR_VARIABLE _err
285-
RESULT_VARIABLE _res
286-
)
287-
if(NOT _res EQUAL 0)
288-
message(SEND_ERROR "Failed to run 'gcc -dumpmachine':\n ${_res}")
289-
endif()
290-
string(REPLACE "-" ";" GCC_DUMP_MACHINE_OUT_LIST "${GCC_DUMP_MACHINE_OUT}")
291-
list(LENGTH GCC_DUMP_MACHINE_OUT_LIST GCC_TRIPLET_LEN)
292-
if(GCC_TRIPLET_LEN EQUAL 4)
293-
list(GET GCC_DUMP_MACHINE_OUT_LIST 0 CMAKE_SYSTEM_PROCESSOR)
294-
message("Setting CMAKE_SYSTEM_PROCESSOR to ${CMAKE_SYSTEM_PROCESSOR}")
295-
endif()
296-
endif()
297-
endif()
298-
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
299-
endif()
300262
if (onnxruntime_BUILD_FOR_NATIVE_MACHINE)
301263
string(APPEND CMAKE_CXX_FLAGS " -march=native -mtune=native")
302264
string(APPEND CMAKE_C_FLAGS " -march=native -mtune=native")

cmake/deps.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/de0ce7c7251372892e53c
5151
re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
5252
safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
5353
tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
54-
cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.9.2.zip;b7f8dc4a879765127ce31dfeabd31c556c80ec79
54+
cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v4.2.1.zip;5d2b21b10478556c5e209dd7229e298a5c9f0b02
5555
extensions;https://github.com/microsoft/onnxruntime-extensions/archive/c24b7bab0c12f53da76d0c31b03b9f0f8ec8f3b4.zip;239063aee4946a9af147b473a4c3da78ba7413b4
5656
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
5757
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.12.0.zip;7e733cfdc410d777b76122d64232499205589a96
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# This file will set the onnxruntime_target_platform variable, if applicable.
2+
# onnxruntime_target_platform identifies the platform to compile for.
3+
block(PROPAGATE onnxruntime_target_platform)
4+
5+
unset(onnxruntime_target_platform)
6+
7+
if (MSVC)
8+
if (CMAKE_VS_PLATFORM_NAME)
9+
# Multi-platform generator
10+
set(onnxruntime_target_platform ${CMAKE_VS_PLATFORM_NAME})
11+
else()
12+
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
13+
endif()
14+
15+
if (onnxruntime_target_platform STREQUAL "ARM64" OR
16+
onnxruntime_target_platform STREQUAL "ARM64EC")
17+
# Do nothing. We'll just use the current value of onnxruntime_target_platform.
18+
elseif (onnxruntime_target_platform STREQUAL "ARM" OR
19+
CMAKE_GENERATOR MATCHES "ARM")
20+
set(onnxruntime_target_platform "ARM")
21+
elseif (onnxruntime_target_platform STREQUAL "x64" OR
22+
onnxruntime_target_platform STREQUAL "x86_64" OR
23+
onnxruntime_target_platform STREQUAL "AMD64" OR
24+
CMAKE_GENERATOR MATCHES "Win64")
25+
set(onnxruntime_target_platform "x64")
26+
elseif (onnxruntime_target_platform STREQUAL "Win32" OR
27+
onnxruntime_target_platform STREQUAL "x86" OR
28+
onnxruntime_target_platform STREQUAL "i386" OR
29+
onnxruntime_target_platform STREQUAL "i686")
30+
set(onnxruntime_target_platform "x86")
31+
else()
32+
message(FATAL_ERROR "Unknown target platform: ${onnxruntime_target_platform}")
33+
endif()
34+
elseif(APPLE)
35+
if(DEFINED CMAKE_OSX_ARCHITECTURES)
36+
# We'll only set onnxruntime_target_platform when CMAKE_OSX_ARCHITECTURES specifies a single architecture.
37+
list(LENGTH CMAKE_OSX_ARCHITECTURES CMAKE_OSX_ARCHITECTURES_LEN)
38+
if(CMAKE_OSX_ARCHITECTURES_LEN EQUAL 1)
39+
set(onnxruntime_target_platform ${CMAKE_OSX_ARCHITECTURES})
40+
endif()
41+
else()
42+
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
43+
endif()
44+
else()
45+
#XXX: Sometimes the value of CMAKE_SYSTEM_PROCESSOR is set but it's wrong. For example, if you run an armv7 docker
46+
#image on an aarch64 machine with an aarch64 Ubuntu host OS, in the docker instance cmake may still report
47+
# CMAKE_SYSTEM_PROCESSOR as aarch64 by default. Given compiling this code may need more than 2GB memory, we do not
48+
# support compiling for ARM32 natively(only support cross-compiling), we will ignore this issue for now.
49+
if(NOT CMAKE_SYSTEM_PROCESSOR)
50+
message(WARNING "CMAKE_SYSTEM_PROCESSOR is not set. Please set it in your toolchain cmake file.")
51+
# Try to detect it
52+
if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
53+
execute_process(
54+
COMMAND "${CMAKE_C_COMPILER}" -dumpmachine
55+
OUTPUT_VARIABLE GCC_DUMP_MACHINE_OUT
56+
OUTPUT_STRIP_TRAILING_WHITESPACE
57+
ERROR_VARIABLE _err
58+
RESULT_VARIABLE _res
59+
)
60+
if(NOT _res EQUAL 0)
61+
message(SEND_ERROR "Failed to run 'gcc -dumpmachine':\n ${_res}")
62+
endif()
63+
string(REPLACE "-" ";" GCC_DUMP_MACHINE_OUT_LIST "${GCC_DUMP_MACHINE_OUT}")
64+
list(LENGTH GCC_DUMP_MACHINE_OUT_LIST GCC_TRIPLET_LEN)
65+
if(GCC_TRIPLET_LEN EQUAL 4)
66+
list(GET GCC_DUMP_MACHINE_OUT_LIST 0 CMAKE_SYSTEM_PROCESSOR)
67+
message("Setting CMAKE_SYSTEM_PROCESSOR to ${CMAKE_SYSTEM_PROCESSOR}")
68+
endif()
69+
endif()
70+
endif()
71+
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
72+
endif()
73+
74+
if(DEFINED onnxruntime_target_platform)
75+
message(STATUS "onnxruntime_target_platform = ${onnxruntime_target_platform}")
76+
else()
77+
message(WARNING "onnxruntime_target_platform is not set")
78+
endif()
79+
80+
endblock()

cmake/external/cutlass.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ onnxruntime_fetchcontent_declare(
44
URL ${DEP_URL_cutlass}
55
URL_HASH SHA1=${DEP_SHA1_cutlass}
66
EXCLUDE_FROM_ALL
7+
PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass_4.2.1_maybe_unused.patch
78
)
89

910
FetchContent_GetProperties(cutlass)

0 commit comments

Comments
 (0)